161 files changed, 22519 insertions, 1724 deletions
diff --git a/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll
new file mode 100644
index 0000000..1de8ab5
--- /dev/null
+++ b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll
@@ -0,0 +1,30 @@
+; RUN: opt %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+; BasicAA should prove that loads from sufficiently large static offsets
+; don't overlap with matrix loads with a statically known size.
+
+define <8 x double> @non_overlapping_strided_load(ptr %src) {
+; CHECK-LABEL: Function: non_overlapping_strided_load:
+; Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+; Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+entry:
+  %src.offset = getelementptr inbounds double, ptr %src, i32 12
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+  ret <8 x double> %l
+}
+
+define <8 x double> @overlapping_strided_load(ptr %src) {
+; CHECK-LABEL: Function: overlapping_strided_load:
+; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+;
+entry:
+  %src.offset = getelementptr inbounds double, ptr %src, i32 11
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+  ret <8 x double> %l
+}
+
+declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
diff --git a/llvm/test/Assembler/autoupgrade-invalid-masked-align.ll b/llvm/test/Assembler/autoupgrade-invalid-masked-align.ll
new file mode 100644
index 0000000..458bd2e
--- /dev/null
+++ b/llvm/test/Assembler/autoupgrade-invalid-masked-align.ll
@@ -0,0 +1,49 @@
+; RUN: split-file %s %t
+; RUN: not llvm-as < %t/masked-store.ll 2>&1 | FileCheck %s --check-prefix=MASKED-STORE
+; RUN: not llvm-as < %t/masked-store-zero.ll 2>&1 | FileCheck %s --check-prefix=MASKED-STORE-ZERO
+; RUN: not llvm-as < %t/masked-load.ll 2>&1 | FileCheck %s --check-prefix=MASKED-LOAD
+; RUN: not llvm-as < %t/masked-load-zero.ll 2>&1 | FileCheck %s --check-prefix=MASKED-LOAD-ZERO
+; RUN: not llvm-as < %t/masked-scatter.ll 2>&1 | FileCheck %s --check-prefix=MASKED-SCATTER
+; RUN: not llvm-as < %t/masked-gather.ll 2>&1 | FileCheck %s --check-prefix=MASKED-GATHER
+
+;--- masked-store.ll
+; MASKED-STORE: LLVM ERROR: Invalid alignment argument
+define void @masked_store(ptr %ptr, <2 x i1> %mask, <2 x double> %val) {
+  call void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %ptr, i32 3, <2 x i1> %mask)
+  ret void
+}
+
+;--- masked-store-zero.ll
+; MASKED-STORE-ZERO: LLVM ERROR: Invalid zero alignment argument
+define void @masked_store_zero(ptr %ptr, <2 x i1> %mask, <2 x double> %val) {
+  call void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %ptr, i32 0, <2 x i1> %mask)
+  ret void
+}
+
+;--- masked-load.ll
+; MASKED-LOAD: LLVM ERROR: Invalid alignment argument
+define void @masked_load(ptr %ptr, <2 x i1> %mask, <2 x double> %val) {
+  call <2 x double> @llvm.masked.load.v2f64.p0(ptr %ptr, i32 3, <2 x i1> %mask, <2 x double> %val)
+  ret void
+}
+
+;--- masked-load-zero.ll
+; MASKED-LOAD-ZERO: LLVM ERROR: Invalid zero alignment argument
+define void @masked_load_zero(ptr %ptr, <2 x i1> %mask, <2 x double> %val) {
+  call <2 x double> @llvm.masked.load.v2f64.p0(ptr %ptr, i32 0, <2 x i1> %mask, <2 x double> %val)
+  ret void
+}
+
+;--- masked-scatter.ll
+; MASKED-SCATTER: LLVM ERROR: Invalid alignment argument
+define void @masked_scatter(<2 x ptr> %ptr, <2 x i1> %mask, <2 x double> %val) {
+  call void @llvm.masked.scatter.v2f64.p0(<2 x double> %val, <2 x ptr> %ptr, i32 3, <2 x i1> %mask)
+  ret void
+}
+
+;--- masked-gather.ll
+; MASKED-GATHER: LLVM ERROR: Invalid alignment argument
+define void @masked_gather(<2 x ptr> %ptr, <2 x i1> %mask, <2 x double> %val) {
+  call <2 x double> @llvm.masked.gather.v2f64.p0(<2 x ptr> %ptr, i32 3, <2 x i1> %mask, <2 x double> %val)
+  ret void
+}
diff --git a/llvm/test/Bitcode/upgrade-branch-protection.ll b/llvm/test/Bitcode/upgrade-branch-protection.ll
index 1b33e39..6f60ba5 100644
--- a/llvm/test/Bitcode/upgrade-branch-protection.ll
+++ b/llvm/test/Bitcode/upgrade-branch-protection.ll
@@ -1,8 +1,11 @@
-;; Test that module flags "branch-target-enforcement" and "sign-return-address"  can be upgraded to
-;; are upgraded from Error to Min.
+;; Test that module flags "branch-target-enforcement" and "sign-return-address"
+;; can be upgraded to are upgraded from Error to Min and the value is changed 2
+;; as the module is converted to the semantic.
 
 ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s
 
+target triple = "aarch64-unknown-linux-gnu"
+
 !llvm.module.flags = !{!0, !1, !2, !3}
 
 !0 = !{i32 1, !"branch-target-enforcement", i32 1}
@@ -10,7 +13,7 @@
 !2 = !{i32 1, !"sign-return-address-all", i32 1}
 !3 = !{i32 1, !"sign-return-address-with-bkey", i32 1}
 
-;CHECK: !0 = !{i32 8, !"branch-target-enforcement", i32 1}
-;CHECK: !1 = !{i32 8, !"sign-return-address", i32 1}
-;CHECK: !2 = !{i32 8, !"sign-return-address-all", i32 1}
-;CHECK: !3 = !{i32 8, !"sign-return-address-with-bkey", i32 1}
-\ No newline at end of file
+;CHECK: !0 = !{i32 8, !"branch-target-enforcement", i32 2}
+;CHECK: !1 = !{i32 8, !"sign-return-address", i32 2}
+;CHECK: !2 = !{i32 8, !"sign-return-address-all", i32 2}
+;CHECK: !3 = !{i32 8, !"sign-return-address-with-bkey", i32 2}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
index 97a0417..b040ff2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
@@ -56,7 +56,7 @@
   }
 
 
-  attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2" "use-soft-float"="false" }
   attributes #1 = { argmemonly nounwind }
   attributes #2 = { optsize }
   attributes #3 = { minsize }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
index fc4fbac..f24aeae 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
@@ -47,7 +47,7 @@
     ret void
   }
 
-  attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2" "use-soft-float"="false" }
   attributes #1 = { argmemonly nounwind }
 
 ...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
index b06cadf..e4d2ca3 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
@@ -50,7 +50,7 @@
 
   declare void @llvm.stackprotector(ptr, ptr) #2
 
-  attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2" "use-soft-float"="false" }
   attributes #1 = { argmemonly nounwind }
 
 ...
diff --git a/llvm/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/llvm/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
index 0c1776e..6e3682a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
@@ -37,7 +37,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
index f2ed57e..353e818 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
@@ -325,7 +325,7 @@ entry:
 
 declare void @hhh(double, double)
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind }
 
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
index 7e97116..8da0e11 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -694,8 +694,8 @@ bb1:
 ; CHECK:  .[[LABEL]]:
 ; CHECK:  ret
 
-attributes #0 = { "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !1 = !{!2, !2, i64 0}
 !2 = !{!"int", !3, i64 0}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll b/llvm/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
index 296435a..937bfe4 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
@@ -519,8 +519,8 @@ while.cond:
   br label %while.cond
 }
 
-attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind }
 
 
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mov-debug-locs.mir b/llvm/test/CodeGen/AArch64/aarch64-mov-debug-locs.mir
index 45fa2be5..c05d661 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mov-debug-locs.mir
+++ b/llvm/test/CodeGen/AArch64/aarch64-mov-debug-locs.mir
@@ -79,8 +79,8 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #3
 
-  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
+  attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
   attributes #2 = { nounwind readnone speculatable }
   attributes #3 = { nounwind }
   
diff --git a/llvm/test/CodeGen/AArch64/arm64-detect-vec-redux.ll b/llvm/test/CodeGen/AArch64/arm64-detect-vec-redux.ll
index 4e86f52..071344d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-detect-vec-redux.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-detect-vec-redux.ll
@@ -47,6 +47,6 @@ declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) #1
 ; Function Attrs: nounwind readnone
 declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) #1
 
-attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/arm64-fma-combine-with-fpfusion.ll b/llvm/test/CodeGen/AArch64/arm64-fma-combine-with-fpfusion.ll
index 9b3d539..0ddcdcc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fma-combine-with-fpfusion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fma-combine-with-fpfusion.ll
@@ -8,5 +8,5 @@ define float @mul_add(float %a, float %b, float %c) local_unnamed_addr #0 {
     ret float %add
 }
 
-attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll b/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll
index e17a0a9..54f752e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 -enable-unsafe-fp-math -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 -verify-machineinstrs | FileCheck %s
 
 define void @foo_2d(ptr %src) {
 ; CHECK-LABEL: %entry
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
index d2ce7e6..41f57bf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
@@ -84,7 +84,7 @@ bb3:                                              ; preds = %bb3, %bb
 ; Function Attrs: nounwind readnone
 declare i64 @llvm.objectsize.i64.p0(ptr, i1) #1
 
-attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !1 = !{!2, !2, i64 0}
diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 0b22fa4..c2b2c1e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1654,24 +1654,14 @@ define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(ptr %addr) {
 }
 
 define <8 x i8> @dup_ld1_from_stack(ptr %__ret) {
-; CHECK-SD-LABEL: dup_ld1_from_stack:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    sub sp, sp, #16
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-SD-NEXT:    add x8, sp, #15
-; CHECK-SD-NEXT:    ld1r.8b { v0 }, [x8]
-; CHECK-SD-NEXT:    add sp, sp, #16
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: dup_ld1_from_stack:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT:    .cfi_offset w29, -16
-; CHECK-GI-NEXT:    add x8, sp, #15
-; CHECK-GI-NEXT:    ld1r.8b { v0 }, [x8]
-; CHECK-GI-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: dup_ld1_from_stack:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    add x8, sp, #15
+; CHECK-NEXT:    ld1r.8b { v0 }, [x8]
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
 entry:
   %item = alloca i8, align 1
   %0 = load i8, ptr %item, align 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
index 4cdc6cc..c6cf240 100644
--- a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
@@ -107,7 +107,7 @@ define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Function Attrs: nounwind
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind }
 
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
index 82b34ef..bb1a6b0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
@@ -108,5 +108,5 @@ for.end:                                          ; preds = %for.cond
 ; Function Attrs: nounwind
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/arm64-rounding.ll b/llvm/test/CodeGen/AArch64/arm64-rounding.ll
index d487aab..3ce35bf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rounding.ll
@@ -201,4 +201,4 @@ entry:
 }
 
 attributes #0 = { nounwind }
-attributes #1 = { nounwind "unsafe-fp-math"="true" }
+attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/arm64-storebytesmerge.ll b/llvm/test/CodeGen/AArch64/arm64-storebytesmerge.ll
index db65fdd..1486b3a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-storebytesmerge.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-storebytesmerge.ll
@@ -36,6 +36,6 @@ for.end705.i:                                     ; preds = %for.body453.i
 
 declare void @f() local_unnamed_addr #1
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+fp-armv8,+neon" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+fp-armv8,+neon" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+fp-armv8,+neon" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+fp-armv8,+neon" "use-soft-float"="false" }
 attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
index fc59350..593d629 100644
--- a/llvm/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
@@ -18,7 +18,7 @@ entry:
   ret i32 %1
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.ident = !{!0}
 
diff --git a/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll b/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll
index 2e3b99f..c4bf7d2 100644
--- a/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll
+++ b/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll
@@ -61,4 +61,4 @@ declare dso_local void @e(...) local_unnamed_addr #0
 
 declare dso_local i64 @llvm.aarch64.space(i32, i64) local_unnamed_addr #0
 
-attributes #0 = { nounwind "branch-target-enforcement" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "branch-target-enforcement" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8.5a" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/AArch64/consthoist-gep.ll b/llvm/test/CodeGen/AArch64/consthoist-gep.ll
index 031ee35..7d2aaec 100644
--- a/llvm/test/CodeGen/AArch64/consthoist-gep.ll
+++ b/llvm/test/CodeGen/AArch64/consthoist-gep.ll
@@ -108,7 +108,7 @@ bb19:                                             ; preds = %bb3, %bb
   ret void
 }
 
-attributes #0 = { norecurse nounwind optsize ssp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind optsize ssp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.module.flags = !{!0, !1}
 !llvm.ident = !{!2}
diff --git a/llvm/test/CodeGen/AArch64/csel-zero-float.ll b/llvm/test/CodeGen/AArch64/csel-zero-float.ll
index 6edde13..56a33cc 100644
--- a/llvm/test/CodeGen/AArch64/csel-zero-float.ll
+++ b/llvm/test/CodeGen/AArch64/csel-zero-float.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -enable-unsafe-fp-math < %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s
 ; There is no invocation to FileCheck as this
 ; caused a crash in "Post-RA pseudo instruction expansion"
 
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-invaraints.ll b/llvm/test/CodeGen/AArch64/dag-combine-invaraints.ll
index 61df396..e561481 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-invaraints.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-invaraints.ll
@@ -32,5 +32,5 @@ main_:
 
 declare i32 @printf(ptr, ...) #1
 
-attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll b/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll
index 1a83930..9193025 100644
--- a/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
 
 ; load zero-extended i32, bitcast to f64
-define double @_Z9load_u64_from_u32_testPj(ptr %n){
-; CHECK-LABEL: _Z9load_u64_from_u32_testPj:
+define double @load_u64_from_u32(ptr %n){
+; CHECK-LABEL: load_u64_from_u32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr s0, [x0]
 ; CHECK-NEXT:    ret
@@ -15,8 +15,8 @@ entry:
 }
 
 ; load zero-extended i16, bitcast to f64
-define double @_Z9load_u64_from_u16_testPj(ptr %n){
-; CHECK-LABEL: _Z9load_u64_from_u16_testPj:
+define double @load_u64_from_u16(ptr %n){
+; CHECK-LABEL: load_u64_from_u16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr h0, [x0]
 ; CHECK-NEXT:    ret
@@ -28,8 +28,8 @@ entry:
 }
 
 ; load zero-extended i8, bitcast to f64
-define double @_Z16load_u64_from_u8Ph(ptr %n){
-; CHECK-LABEL: _Z16load_u64_from_u8Ph:
+define double @load_u64_from_u8(ptr %n){
+; CHECK-LABEL: load_u64_from_u8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr b0, [x0]
 ; CHECK-NEXT:    ret
@@ -41,8 +41,8 @@ entry:
 }
 
 ; load zero-extended i16, bitcast to f32
-define float @_Z17load_u32_from_u16Pt(ptr %n){
-; CHECK-LABEL: _Z17load_u32_from_u16Pt:
+define float @load_u32_from_u16(ptr %n){
+; CHECK-LABEL: load_u32_from_u16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr h0, [x0]
 ; CHECK-NEXT:    ret
@@ -54,8 +54,8 @@ entry:
 }
 
 ; load zero-extended i8, bitcast to f32
-define float @_Z16load_u32_from_u8Ph(ptr %n){
-; CHECK-LABEL: _Z16load_u32_from_u8Ph:
+define float @load_u32_from_u8(ptr %n){
+; CHECK-LABEL: load_u32_from_u8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr b0, [x0]
 ; CHECK-NEXT:    ret
@@ -67,8 +67,8 @@ entry:
 }
 
 ; load zero-extended i8, bitcast to f16
-define half @_Z16load_u16_from_u8Ph(ptr %n){
-; CHECK-LABEL: _Z16load_u16_from_u8Ph:
+define half @load_u16_from_u8(ptr %n){
+; CHECK-LABEL: load_u16_from_u8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr b0, [x0]
 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
@@ -80,3 +80,504 @@ entry:
   ret half %1
 }
 
+
+define double @load_u64_from_u32_off1(ptr %n){
+; CHECK-LABEL: load_u64_from_u32_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldur w8, [x0, #1]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 1
+  %0 = load i32, ptr %p, align 4
+  %conv = zext i32 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u16_off1(ptr %n){
+; CHECK-LABEL: load_u64_from_u16_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldurh w8, [x0, #1]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 1
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u8_off1(ptr %n){
+; CHECK-LABEL: load_u64_from_u8_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrb w8, [x0, #1]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 1
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define float @load_u32_from_u16_off1(ptr %n){
+; CHECK-LABEL: load_u32_from_u16_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldurh w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 1
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define float @load_u32_from_u8_off1(ptr %n){
+; CHECK-LABEL: load_u32_from_u8_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrb w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 1
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define half @load_u16_from_u8_off1(ptr %n){
+; CHECK-LABEL: load_u16_from_u8_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrb w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 1
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i16
+  %1 = bitcast i16 %conv to half
+  ret half %1
+}
+
+
+
+define double @load_u64_from_u32_off2(ptr %n){
+; CHECK-LABEL: load_u64_from_u32_off2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldur w8, [x0, #2]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 2
+  %0 = load i32, ptr %p, align 4
+  %conv = zext i32 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u16_off2(ptr %n){
+; CHECK-LABEL: load_u64_from_u16_off2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrh w8, [x0, #2]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 2
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u8_off2(ptr %n){
+; CHECK-LABEL: load_u64_from_u8_off2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrb w8, [x0, #2]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 2
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define float @load_u32_from_u16_off2(ptr %n){
+; CHECK-LABEL: load_u32_from_u16_off2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr h0, [x0, #2]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 2
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define float @load_u32_from_u8_off2(ptr %n){
+; CHECK-LABEL: load_u32_from_u8_off2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #1]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 2
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define half @load_u16_from_u8_off2(ptr %n){
+; CHECK-LABEL: load_u16_from_u8_off2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #1]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 2
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i16
+  %1 = bitcast i16 %conv to half
+  ret half %1
+}
+
+
+
+define double @load_u64_from_u32_off255(ptr %n){
+; CHECK-LABEL: load_u64_from_u32_off255:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldur w8, [x0, #255]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 255
+  %0 = load i32, ptr %p, align 4
+  %conv = zext i32 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u16_off255(ptr %n){
+; CHECK-LABEL: load_u64_from_u16_off255:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldurh w8, [x0, #255]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 255
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u8_off255(ptr %n){
+; CHECK-LABEL: load_u64_from_u8_off255:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrb w8, [x0, #255]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 255
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define float @load_u32_from_u16_off255(ptr %n){
+; CHECK-LABEL: load_u32_from_u16_off255:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldurh w8, [x0, #255]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 255
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define float @load_u32_from_u8_off255(ptr %n){
+; CHECK-LABEL: load_u32_from_u8_off255:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrb w8, [x0, #255]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 255
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define half @load_u16_from_u8_off255(ptr %n){
+; CHECK-LABEL: load_u16_from_u8_off255:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrb w8, [x0, #255]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 255
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i16
+  %1 = bitcast i16 %conv to half
+  ret half %1
+}
+
+
+define double @load_u64_from_u32_off256(ptr %n){
+; CHECK-LABEL: load_u64_from_u32_off256:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr s0, [x0, #256]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 256
+  %0 = load i32, ptr %p, align 4
+  %conv = zext i32 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u16_off256(ptr %n){
+; CHECK-LABEL: load_u64_from_u16_off256:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr h0, [x0, #128]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 256
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u8_off256(ptr %n){
+; CHECK-LABEL: load_u64_from_u8_off256:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #64]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 256
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define float @load_u32_from_u16_off256(ptr %n){
+; CHECK-LABEL: load_u32_from_u16_off256:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr h0, [x0, #256]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 256
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define float @load_u32_from_u8_off256(ptr %n){
+; CHECK-LABEL: load_u32_from_u8_off256:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #128]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 256
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define half @load_u16_from_u8_off256(ptr %n){
+; CHECK-LABEL: load_u16_from_u8_off256:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #128]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 256
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i16
+  %1 = bitcast i16 %conv to half
+  ret half %1
+}
+
+
+
+define double @load_u64_from_u32_offn(ptr %n){
+; CHECK-LABEL: load_u64_from_u32_offn:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr s0, [x0, #16380]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 16380
+  %0 = load i32, ptr %p, align 4
+  %conv = zext i32 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u16_offn(ptr %n){
+; CHECK-LABEL: load_u64_from_u16_offn:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #8190 // =0x1ffe
+; CHECK-NEXT:    ldr h0, [x0, x8]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 8190
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u8_offn(ptr %n){
+; CHECK-LABEL: load_u64_from_u8_offn:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #4095]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 4095
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define float @load_u32_from_u16_offn(ptr %n){
+; CHECK-LABEL: load_u32_from_u16_offn:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr h0, [x0, #8190]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 8190
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define float @load_u32_from_u8_offn(ptr %n){
+; CHECK-LABEL: load_u32_from_u8_offn:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #4095]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 4095
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define half @load_u16_from_u8_offn(ptr %n){
+; CHECK-LABEL: load_u16_from_u8_offn:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #4095]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 4095
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i16
+  %1 = bitcast i16 %conv to half
+  ret half %1
+}
+
+
+define double @load_u64_from_u32_offnp1(ptr %n){
+; CHECK-LABEL: load_u64_from_u32_offnp1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add x8, x0, #4, lsl #12 // =16384
+; CHECK-NEXT:    ldr s0, [x8]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 16384
+  %0 = load i32, ptr %p, align 4
+  %conv = zext i32 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u16_offnp1(ptr %n){
+; CHECK-LABEL: load_u64_from_u16_offnp1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr h0, [x0, #4096]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 8192
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define double @load_u64_from_u8_offnp1(ptr %n){
+; CHECK-LABEL: load_u64_from_u8_offnp1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #1024]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 4096
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+define float @load_u32_from_u16_offnp1(ptr %n){
+; CHECK-LABEL: load_u32_from_u16_offnp1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add x8, x0, #2, lsl #12 // =8192
+; CHECK-NEXT:    ldr h0, [x8]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 8192
+  %0 = load i16, ptr %p, align 2
+  %conv = zext i16 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define float @load_u32_from_u8_offnp1(ptr %n){
+; CHECK-LABEL: load_u32_from_u8_offnp1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #2048]
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 4096
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+define half @load_u16_from_u8_offnp1(ptr %n){
+; CHECK-LABEL: load_u16_from_u8_offnp1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0, #2048]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; CHECK-NEXT:    ret
+entry:
+  %p = getelementptr i8, ptr %n, i64 4096
+  %0 = load i8, ptr %p, align 1
+  %conv = zext i8 %0 to i16
+  %1 = bitcast i16 %conv to half
+  ret half %1
+}
+
diff --git a/llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll b/llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll
index c2ef2fa..00a8c30 100644
--- a/llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll
+++ b/llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll
@@ -74,7 +74,7 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !10
 }
 
-attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
 
 !llvm.module.flags = !{!0}
 !llvm.ident = !{!1}
diff --git a/llvm/test/CodeGen/AArch64/recp-fastmath.ll b/llvm/test/CodeGen/AArch64/recp-fastmath.ll
index 9f00621..fa1da33 100644
--- a/llvm/test/CodeGen/AArch64/recp-fastmath.ll
+++ b/llvm/test/CodeGen/AArch64/recp-fastmath.ll
@@ -164,5 +164,5 @@ define <4 x double> @d4recp1(<4 x double> %x) #1 {
 ; CHECK-NOT: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
 }
 
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
-attributes #1 = { nounwind "unsafe-fp-math"="true" "reciprocal-estimates"="div,vec-div" }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "reciprocal-estimates"="div,vec-div" }
diff --git a/llvm/test/CodeGen/AArch64/shrink-wrap-const-pool-access.mir b/llvm/test/CodeGen/AArch64/shrink-wrap-const-pool-access.mir
new file mode 100644
index 0000000..6f33a75
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/shrink-wrap-const-pool-access.mir
@@ -0,0 +1,76 @@
+# RUN: llc -mtriple=aarch64 -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s
+--- |
+  declare double @foo()
+
+  define double @shrink_wrap_load_from_const_pool(double %q) {
+  entry:
+    %0 = fcmp oeq double %q, 3.125500e+02
+    br i1 %0, label %common.ret, label %if.else
+
+  common.ret:                                       ; preds = %if.else, %entry, %exit1
+    %common.ret.op = phi double [ %3, %exit1 ], [ 0.000000e+00, %entry ], [ 0.000000e+00, %if.else ]
+    ret double %common.ret.op
+
+  if.else:                                          ; preds = %entry
+    %1 = call double @foo()
+    %2 = fcmp oeq double %1, 0.000000e+00
+    br i1 %2, label %exit1, label %common.ret
+
+  exit1:                                            ; preds = %if.else
+    %3 = call double @foo()
+    br label %common.ret
+  }
+...
+# Following code has a load from constant pool. Accessing constant pool
+# must not be considered as a stack access and hence, shrink wrapping must
+# happen.
+# CHECK-LABEL:name:            shrink_wrap_load_from_const_pool
+# CHECK:  savePoint:
+# CHECK:    - point:           '%bb.3'
+# CHECK:  restorePoint:
+# CHECK:    - point:           '%bb.5'
+---
+name:            shrink_wrap_load_from_const_pool
+tracksRegLiveness: true
+constants:
+  - id:              0
+    value:           'double 3.125500e+02'
+    alignment:       8
+body:             |
+  bb.0.entry:
+    successors: %bb.4(0x50000000), %bb.2(0x30000000)
+    liveins: $d0
+
+    renamable $d1 = COPY $d0
+    renamable $x8 = ADRP target-flags(aarch64-page) %const.0
+    renamable $d2 = LDRDui killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s64) from constant-pool)
+    renamable $d0 = FMOVD0
+    nofpexcept FCMPDrr killed renamable $d1, killed renamable $d2, implicit-def $nzcv, implicit $fpcr
+    Bcc 1, %bb.2, implicit killed $nzcv
+
+  bb.4:
+    liveins: $d0
+
+  bb.1.common.ret:
+    liveins: $d0
+
+    RET_ReallyLR implicit $d0
+
+  bb.2.if.else:
+    successors: %bb.3(0x50000000), %bb.1(0x30000000)
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $d0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    renamable $d1 = COPY $d0
+    renamable $d0 = FMOVD0
+    nofpexcept FCMPDri killed renamable $d1, implicit-def $nzcv, implicit $fpcr
+    Bcc 1, %bb.1, implicit killed $nzcv
+    B %bb.3
+
+  bb.3.exit1:
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $d0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    B %bb.1
+...
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-ex-1.ll b/llvm/test/CodeGen/AArch64/stack-tagging-ex-1.ll
index 66ac04e..22abb8c 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging-ex-1.ll
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-ex-1.ll
@@ -64,6 +64,6 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
 ; Function Attrs: argmemonly nounwind willreturn
 declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
 
-attributes #0 = { sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind willreturn }
 attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll b/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll
index e5725bc..d689a76 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll
@@ -158,10 +158,10 @@ eh.resume:                                        ; preds = %lpad.body
   resume { ptr, i32 } %eh.lpad-body
 }
 
-attributes #0 = { noreturn sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind willreturn }
 attributes #2 = { nounwind readnone }
-attributes #3 = { norecurse sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { norecurse sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "use-soft-float"="false" }
 attributes #4 = { nounwind }
 attributes #5 = { noreturn }
 
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll b/llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll
index 91adf82..7483622 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll
@@ -77,6 +77,6 @@ declare void @llvm.lifetime.start.p0(ptr nocapture) #1
 
 declare void @llvm.lifetime.end.p0(ptr nocapture) #1
 
-attributes #0 = { sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "use-soft-float"="false" }
 attributes #1 = { nounwind }
 
diff --git a/llvm/test/CodeGen/AArch64/stack_guard_remat.ll b/llvm/test/CodeGen/AArch64/stack_guard_remat.ll
index 523eda61..e41d82c 100644
--- a/llvm/test/CodeGen/AArch64/stack_guard_remat.ll
+++ b/llvm/test/CodeGen/AArch64/stack_guard_remat.ll
@@ -54,7 +54,7 @@ declare void @foo3(ptr)
 ; Function Attrs: nounwind
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
 
-attributes #0 = { nounwind sspstrong "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind sspstrong "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 ;--- pic.ll
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll b/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll
index f78fcea..b8dcd6f 100644
--- a/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple aarch64-none-linux-gnu -enable-unsafe-fp-math -mattr=+fullfp16 < %s | FileCheck %s
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+fullfp16 < %s | FileCheck %s
 
 define half @scvtf_f16_2(i32 %state) {
 ; CHECK-LABEL: scvtf_f16_2:
diff --git a/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll b/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll
index 623ea22..89b3b89 100644
--- a/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll
+++ b/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll
@@ -24,7 +24,7 @@ define void @fn(ptr %argA, ptr %argB, ptr %a) #0 align 2 {
 
 ; CHECK: ret
 
-attributes #0 = { noinline norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-features"="+crc,+crypto,+neon" "use-soft-float"="false" }
 
 !llvm.ident = !{!0}
 
diff --git a/llvm/test/CodeGen/AArch64/wineh-frame5.mir b/llvm/test/CodeGen/AArch64/wineh-frame5.mir
index 97c5c85..32580f4 100644
--- a/llvm/test/CodeGen/AArch64/wineh-frame5.mir
+++ b/llvm/test/CodeGen/AArch64/wineh-frame5.mir
@@ -64,9 +64,9 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #3
 
-  attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
   attributes #1 = { argmemonly nounwind }
-  attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
   attributes #3 = { nounwind }
 
 ...
diff --git a/llvm/test/CodeGen/AArch64/wineh-frame6.mir b/llvm/test/CodeGen/AArch64/wineh-frame6.mir
index 5ba7842..d76fae1 100644
--- a/llvm/test/CodeGen/AArch64/wineh-frame6.mir
+++ b/llvm/test/CodeGen/AArch64/wineh-frame6.mir
@@ -47,8 +47,8 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #2
 
-  attributes #0 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
+  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
   attributes #2 = { nounwind }
 
 ...
diff --git a/llvm/test/CodeGen/AArch64/wineh-frame7.mir b/llvm/test/CodeGen/AArch64/wineh-frame7.mir
index 1599098..d4e71d9 100644
--- a/llvm/test/CodeGen/AArch64/wineh-frame7.mir
+++ b/llvm/test/CodeGen/AArch64/wineh-frame7.mir
@@ -71,8 +71,8 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #2
 
-  attributes #0 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
+  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
   attributes #2 = { nounwind }
 
 ...
diff --git a/llvm/test/CodeGen/AArch64/wineh-frame8.mir b/llvm/test/CodeGen/AArch64/wineh-frame8.mir
index 9de99ac..56f92f2 100644
--- a/llvm/test/CodeGen/AArch64/wineh-frame8.mir
+++ b/llvm/test/CodeGen/AArch64/wineh-frame8.mir
@@ -29,7 +29,7 @@
     ret i32 %add
   }
 
-  attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
 
 ...
 ---
diff --git a/llvm/test/CodeGen/AArch64/wineh5.mir b/llvm/test/CodeGen/AArch64/wineh5.mir
index efdd4b0..1c09b78 100644
--- a/llvm/test/CodeGen/AArch64/wineh5.mir
+++ b/llvm/test/CodeGen/AArch64/wineh5.mir
@@ -73,8 +73,8 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #2
 
-  attributes #0 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
+  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
   attributes #2 = { nounwind }
 
   !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir b/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir
index 2f631c2..52d0dff 100644
--- a/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir
+++ b/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir
@@ -56,9 +56,9 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #3
 
-  attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
   attributes #1 = { argmemonly nounwind }
-  attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "use-soft-float"="false" }
   attributes #3 = { nounwind }
 
   !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir
index d9ac9a7..de1bb47 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s
 
 # Test that we fold correct element from G_UNMERGE_VALUES into fma
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
index 52b1beb..91f2f6f1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GFX10
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-unsafe-fp-math -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GFX11
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GFX10
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GFX11
 
 ---
 name:            fract_f64_neg
diff --git a/llvm/test/CodeGen/AMDGPU/add-max.ll b/llvm/test/CodeGen/AMDGPU/add-max.ll
index 00c6656..b3a7057 100644
--- a/llvm/test/CodeGen/AMDGPU/add-max.ll
+++ b/llvm/test/CodeGen/AMDGPU/add-max.ll
@@ -5,7 +5,7 @@
 define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
 ; GCN-LABEL: add_max_u32_vvv:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_max_u32_e64 v0, v0, v1, v2
+; GCN-NEXT:    v_add_max_u32 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -16,7 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
 define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
 ; GCN-LABEL: add_max_u32_svv:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_max_u32_e64 v0, s0, v0, v1
+; GCN-NEXT:    v_add_max_u32 v0, s0, v0, v1
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -27,7 +27,7 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
 define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
 ; SDAG-LABEL: add_max_u32_ssv:
 ; SDAG:       ; %bb.0:
-; SDAG-NEXT:    v_add_max_u32_e64 v0, s0, s1, v0
+; SDAG-NEXT:    v_add_max_u32 v0, s0, s1, v0
 ; SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GISEL-LABEL: add_max_u32_ssv:
@@ -59,7 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c
 define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
 ; GCN-LABEL: add_max_u32_vsi:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_max_u32_e64 v0, v0, s0, 4
+; GCN-NEXT:    v_add_max_u32 v0, v0, s0, 4
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umax.i32(i32 %add, i32 4)
@@ -70,7 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
 define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
 ; GCN-LABEL: add_max_u32_svl:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_max_u32_e64 v0, s0, v0, 0x64
+; GCN-NEXT:    v_add_max_u32 v0, s0, v0, 0x64
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umax.i32(i32 %add, i32 100)
@@ -81,7 +81,7 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
 define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
 ; SDAG-LABEL: add_max_u32_slv:
 ; SDAG:       ; %bb.0:
-; SDAG-NEXT:    v_add_max_u32_e64 v0, 0x64, s0, v0
+; SDAG-NEXT:    v_add_max_u32 v0, 0x64, s0, v0
 ; SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GISEL-LABEL: add_max_u32_slv:
@@ -99,7 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
 define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
 ; GCN-LABEL: add_max_i32_vvv:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_max_i32_e64 v0, v0, v1, v2
+; GCN-NEXT:    v_add_max_i32 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.smax.i32(i32 %add, i32 %c)
@@ -110,7 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
 define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
 ; GCN-LABEL: add_min_u32_vvv:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_min_u32_e64 v0, v0, v1, v2
+; GCN-NEXT:    v_add_min_u32 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umin.i32(i32 %add, i32 %c)
@@ -121,7 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
 define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
 ; GCN-LABEL: add_min_i32_vvv:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_min_i32_e64 v0, v0, v1, v2
+; GCN-NEXT:    v_add_min_i32 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.smin.i32(i32 %add, i32 %c)
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 7ee0015f..711d57b 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -39137,7 +39137,7 @@ define bfloat @v_sitofp_i64_to_bf16(i64 %x) {
 ; GFX1250-NEXT:    v_ashrrev_i32_e32 v2, 31, v2
 ; GFX1250-NEXT:    v_add_nc_u32_e32 v2, 32, v2
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_add_min_u32_e64 v2, v3, -1, v2
+; GFX1250-NEXT:    v_add_min_u32 v2, v3, -1, v2
 ; GFX1250-NEXT:    v_lshlrev_b64_e32 v[0:1], v2, v[0:1]
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1250-NEXT:    v_min_u32_e32 v0, 1, v0
@@ -39487,8 +39487,8 @@ define <2 x bfloat> @v_sitofp_v2i64_to_v2bf16(<2 x i64> %x) {
 ; GFX1250-NEXT:    v_dual_ashrrev_i32 v5, 31, v5 :: v_dual_ashrrev_i32 v4, 31, v4
 ; GFX1250-NEXT:    v_dual_add_nc_u32 v5, 32, v5 :: v_dual_add_nc_u32 v4, 32, v4
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_add_min_u32_e64 v5, v7, -1, v5
-; GFX1250-NEXT:    v_add_min_u32_e64 v4, v6, -1, v4
+; GFX1250-NEXT:    v_add_min_u32 v5, v7, -1, v5
+; GFX1250-NEXT:    v_add_min_u32 v4, v6, -1, v4
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1250-NEXT:    v_lshlrev_b64_e32 v[0:1], v5, v[0:1]
 ; GFX1250-NEXT:    v_lshlrev_b64_e32 v[2:3], v4, v[2:3]
@@ -39979,9 +39979,9 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
 ; GFX1250TRUE16-NEXT:    v_dual_add_nc_u32 v7, 32, v7 :: v_dual_add_nc_u32 v6, 32, v6
 ; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1250TRUE16-NEXT:    v_ashrrev_i32_e32 v8, 31, v8
-; GFX1250TRUE16-NEXT:    v_add_min_u32_e64 v7, v10, -1, v7
+; GFX1250TRUE16-NEXT:    v_add_min_u32 v7, v10, -1, v7
 ; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250TRUE16-NEXT:    v_add_min_u32_e64 v6, v9, -1, v6
+; GFX1250TRUE16-NEXT:    v_add_min_u32 v6, v9, -1, v6
 ; GFX1250TRUE16-NEXT:    v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
 ; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1250TRUE16-NEXT:    v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
@@ -39991,7 +39991,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
 ; GFX1250TRUE16-NEXT:    v_min_u32_e32 v4, 1, v4
 ; GFX1250TRUE16-NEXT:    v_or_b32_e32 v2, v3, v2
 ; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250TRUE16-NEXT:    v_add_min_u32_e64 v8, v11, -1, v8
+; GFX1250TRUE16-NEXT:    v_add_min_u32 v8, v11, -1, v8
 ; GFX1250TRUE16-NEXT:    v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v4, v5, v4 bitop3:0x54
 ; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX1250TRUE16-NEXT:    v_cvt_f32_i32_e32 v2, v2
@@ -40027,8 +40027,8 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
 ; GFX1250FAKE16-NEXT:    v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
 ; GFX1250FAKE16-NEXT:    v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
 ; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250FAKE16-NEXT:    v_add_min_u32_e64 v6, v10, -1, v6
-; GFX1250FAKE16-NEXT:    v_add_min_u32_e64 v7, v11, -1, v7
+; GFX1250FAKE16-NEXT:    v_add_min_u32 v6, v10, -1, v6
+; GFX1250FAKE16-NEXT:    v_add_min_u32 v7, v11, -1, v7
 ; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1250FAKE16-NEXT:    v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
 ; GFX1250FAKE16-NEXT:    v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
@@ -40038,7 +40038,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
 ; GFX1250FAKE16-NEXT:    v_min_u32_e32 v0, 1, v0
 ; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX1250FAKE16-NEXT:    v_or_b32_e32 v2, v3, v2
-; GFX1250FAKE16-NEXT:    v_add_min_u32_e64 v8, v9, -1, v8
+; GFX1250FAKE16-NEXT:    v_add_min_u32 v8, v9, -1, v8
 ; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX1250FAKE16-NEXT:    v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
 ; GFX1250FAKE16-NEXT:    v_cvt_f32_i32_e32 v2, v2
@@ -40656,18 +40656,18 @@ define <4 x bfloat> @v_sitofp_v4i64_to_v4bf16(<4 x i64> %x) {
 ; GFX1250-NEXT:    v_dual_add_nc_u32 v9, 32, v9 :: v_dual_add_nc_u32 v8, 32, v8
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1250-NEXT:    v_dual_ashrrev_i32 v10, 31, v10 :: v_dual_bitop2_b32 v11, v0, v1 bitop3:0x14
-; GFX1250-NEXT:    v_add_min_u32_e64 v9, v13, -1, v9
+; GFX1250-NEXT:    v_add_min_u32 v9, v13, -1, v9
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_add_min_u32_e64 v8, v12, -1, v8
+; GFX1250-NEXT:    v_add_min_u32 v8, v12, -1, v8
 ; GFX1250-NEXT:    v_dual_ashrrev_i32 v11, 31, v11 :: v_dual_add_nc_u32 v10, 32, v10
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX1250-NEXT:    v_lshlrev_b64_e32 v[4:5], v9, v[4:5]
 ; GFX1250-NEXT:    v_lshlrev_b64_e32 v[6:7], v8, v[6:7]
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX1250-NEXT:    v_add_nc_u32_e32 v11, 32, v11
-; GFX1250-NEXT:    v_add_min_u32_e64 v10, v14, -1, v10
+; GFX1250-NEXT:    v_add_min_u32 v10, v14, -1, v10
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_add_min_u32_e64 v11, v15, -1, v11
+; GFX1250-NEXT:    v_add_min_u32 v11, v15, -1, v11
 ; GFX1250-NEXT:    v_lshlrev_b64_e32 v[2:3], v10, v[2:3]
 ; GFX1250-NEXT:    v_min_u32_e32 v6, 1, v6
 ; GFX1250-NEXT:    v_min_u32_e32 v4, 1, v4
diff --git a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
index 13206ad..f45070c 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mattr=+fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
-; RUN: llc -mtriple=amdgcn -mattr=-fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
+; RUN: llc -mtriple=amdgcn -mattr=+fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
+; RUN: llc -mtriple=amdgcn -mattr=-fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
 
-; RUN: llc -mtriple=amdgcn -mattr=+fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=ieee  -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s
-; RUN: llc -mtriple=amdgcn -mattr=-fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=ieee -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s
+; RUN: llc -mtriple=amdgcn -mattr=+fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=ieee  < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s
+; RUN: llc -mtriple=amdgcn -mattr=-fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s
 
 ; FIXME: This should also fold when fma is actually fast if an FMA
 ; exists in the original program.
diff --git a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
index d41e2c6..8df7564 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -enable-unsafe-fp-math < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX89,VI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX89,GFX9 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX11-TRUE16 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX11-FAKE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX89,VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX89,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX11-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX11-FAKE16 %s
 
 define amdgpu_kernel void @fpext_f16_to_f32(
 ; SI-LABEL: fpext_f16_to_f32:
diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
index a43292d..a043d53 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn < %s | FileCheck -check-prefixes=SI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
 
 
 define amdgpu_kernel void @fptosi_f16_to_i16(
diff --git a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
index 96cb621..af1ab37 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
 
 
 define amdgpu_kernel void @fptoui_f16_to_i16(
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
index 5d31177..f1165491 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
@@ -2,14 +2,14 @@
 ; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefixes=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-FAKE16 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-TRUE16 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-FAKE16 %s
 
diff --git a/llvm/test/CodeGen/AMDGPU/fract.f64.ll b/llvm/test/CodeGen/AMDGPU/fract.f64.ll
index f09c1c6..cc2e78d 100644
--- a/llvm/test/CodeGen/AMDGPU/fract.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract.f64.ll
@@ -2,8 +2,8 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefixes=GCN,CI,FUNC %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCN,CI,FUNC %s
 
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -enable-unsafe-fp-math < %s | FileCheck --check-prefixes=GCN,SI,FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck --check-prefixes=GCN,CI,FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,SI,FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCN,CI,FUNC %s
 
 declare double @llvm.fabs.f64(double) #0
 declare double @llvm.floor.f64(double) #0
diff --git a/llvm/test/CodeGen/AMDGPU/fract.ll b/llvm/test/CodeGen/AMDGPU/fract.ll
index 8ef0fcf..723fd93 100644
--- a/llvm/test/CodeGen/AMDGPU/fract.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract.ll
@@ -1,8 +1,8 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn < %s | FileCheck --check-prefix=GCN %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GCN %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefix=GCN %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -enable-unsafe-fp-math < %s | FileCheck --check-prefix=GCN %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck --check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn < %s | FileCheck --check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefix=GCN %s
 
 declare float @llvm.fabs.f32(float) #0
 declare float @llvm.floor.f32(float) #0
diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
index 4ae0ba0..4e93eca 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 -enable-unsafe-fp-math %s  | FileCheck --check-prefixes=GCN,UNSAFE %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 %s  | FileCheck --check-prefixes=GCN,UNSAFE %s
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck --check-prefixes=GCN,NONANS %s
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck --check-prefixes=GCN,NOINFS %s
 
diff --git a/llvm/test/CodeGen/AMDGPU/prevent-fmul-hoist-ir.ll b/llvm/test/CodeGen/AMDGPU/prevent-fmul-hoist-ir.ll
index ef3e04c..6ce614b 100644
--- a/llvm/test/CodeGen/AMDGPU/prevent-fmul-hoist-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/prevent-fmul-hoist-ir.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -passes='simplifycfg<hoist-common-insts>' -mtriple=amdgcn-- --fp-contract=fast -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX -check-prefix=FP-CONTRACT-FAST %s
-; RUN: opt -S -passes='simplifycfg<hoist-common-insts>' -mtriple=amdgcn-- --fp-contract=off --enable-unsafe-fp-math -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX -check-prefix=UNSAFE-FP-MATH %s
+; RUN: opt -S -passes='simplifycfg<hoist-common-insts>' -mtriple=amdgcn-- --fp-contract=off -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX -check-prefix=UNSAFE-FP-MATH %s
 ; RUN: opt -S -passes='simplifycfg<hoist-common-insts>' -mtriple=amdgcn-- --fp-contract=off -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX -check-prefix=NO-UNSAFE-FP-MATH %s
 
 define double @is_profitable_f64_contract(ptr dereferenceable(8) %ptr_x, ptr dereferenceable(8) %ptr_y, ptr dereferenceable(8) %ptr_a) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
index 09596e9..7ddd90e 100644
--- a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
 
 define amdgpu_kernel void @sitofp_i16_to_f16(
 ; SI-LABEL: sitofp_i16_to_f16:
diff --git a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
index 9bcba6c..2d7ce10 100644
--- a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
 
 define amdgpu_kernel void @uitofp_i16_to_f16(
 ; SI-LABEL: uitofp_i16_to_f16:
diff --git a/llvm/test/CodeGen/ARM/llround-conv.ll b/llvm/test/CodeGen/ARM/llround-conv.ll
index f734db8..20fe272 100644
--- a/llvm/test/CodeGen/ARM/llround-conv.ll
+++ b/llvm/test/CodeGen/ARM/llround-conv.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
 ; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
 ; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
 
 define i64 @testmsxh_builtin(half %x) {
@@ -22,6 +23,14 @@ define i64 @testmsxh_builtin(half %x) {
 ; CHECK-NOFP16-NEXT:    bl llroundf
 ; CHECK-NOFP16-NEXT:    pop {r11, pc}
 ;
+; CHECK-FPv8-LABEL: testmsxh_builtin:
+; CHECK-FPv8:       @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT:    .save {r11, lr}
+; CHECK-FPv8-NEXT:    push {r11, lr}
+; CHECK-FPv8-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT:    bl llroundf
+; CHECK-FPv8-NEXT:    pop {r11, pc}
+;
 ; CHECK-FP16-LABEL: testmsxh_builtin:
 ; CHECK-FP16:       @ %bb.0: @ %entry
 ; CHECK-FP16-NEXT:    .save {r11, lr}
diff --git a/llvm/test/CodeGen/ARM/lround-conv.ll b/llvm/test/CodeGen/ARM/lround-conv.ll
index 03f7a0d..7466bcb 100644
--- a/llvm/test/CodeGen/ARM/lround-conv.ll
+++ b/llvm/test/CodeGen/ARM/lround-conv.ll
@@ -4,11 +4,39 @@
 ; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
 ; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
 
-;define i32 @testmswh_builtin(half %x) {
-;entry:
-;  %0 = tail call i32 @llvm.lround.i32.f16(half %x)
-;  ret i32 %0
-;}
+define i32 @testmswh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmswh_builtin:
+; CHECK-SOFT:       @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT:    .save {r11, lr}
+; CHECK-SOFT-NEXT:    push {r11, lr}
+; CHECK-SOFT-NEXT:    bl __aeabi_h2f
+; CHECK-SOFT-NEXT:    pop {r11, lr}
+; CHECK-SOFT-NEXT:    b lroundf
+;
+; CHECK-NOFP16-LABEL: testmswh_builtin:
+; CHECK-NOFP16:       @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT:    .save {r11, lr}
+; CHECK-NOFP16-NEXT:    push {r11, lr}
+; CHECK-NOFP16-NEXT:    vmov r0, s0
+; CHECK-NOFP16-NEXT:    bl __aeabi_h2f
+; CHECK-NOFP16-NEXT:    vmov s0, r0
+; CHECK-NOFP16-NEXT:    pop {r11, lr}
+; CHECK-NOFP16-NEXT:    b lroundf
+;
+; CHECK-FPv8-LABEL: testmswh_builtin:
+; CHECK-FPv8:       @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT:    b lroundf
+;
+; CHECK-FP16-LABEL: testmswh_builtin:
+; CHECK-FP16:       @ %bb.0: @ %entry
+; CHECK-FP16-NEXT:    vcvta.s32.f16 s0, s0
+; CHECK-FP16-NEXT:    vmov r0, s0
+; CHECK-FP16-NEXT:    bx lr
+entry:
+  %0 = tail call i32 @llvm.lround.i32.f16(half %x)
+  ret i32 %0
+}
 
 define i32 @testmsws_builtin(float %x) {
 ; CHECK-LABEL: testmsws_builtin:
@@ -40,8 +68,3 @@ entry:
   ret i32 %0
 }
 
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
-; CHECK-FPv8: {{.*}}
-; CHECK-NOFP16: {{.*}}
-; CHECK-SOFT: {{.*}}
diff --git a/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll
new file mode 100644
index 0000000..9190d03
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -dxil-prepare < %s | FileCheck %s
+
+; Ensures that dxil-prepare will remove the llvm.errno.tbaa metadata
+
+target triple = "dxil-unknown-shadermodel6.0-compute"
+
+define void @main() {
+entry:
+  ret void
+}
+
+; CHECK-NOT: !llvm.errno.tbaa
+; CHECK-NOT: {{^!}}
+
+!llvm.errno.tbaa = !{!0}
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll
new file mode 100644
index 0000000..f7e5cdb
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll
@@ -0,0 +1,33 @@
+; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
+; CHECK-DAG: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = combine(##.LCPI0_0,#-1)
+; CHECK-DAG: [[VREG1:v([0-9]+)]] = vmem(r[[REGH]]+#0)
+; CHECK-DAG: [[REG1:(r[0-9]+)]] = memw(r{{[0-9]+}}+#4)
+; CHECK-DAG: [[VREG2:v([0-9]+)]] = vsplat([[REG1]])
+; CHECK-DAG: [[REG2:(r[0-9]+)]] = memw(r{{[0-9]+}}+#0)
+; CHECK-DAG: [[VREG3:v([0-9]+)]] = vsplat([[REG2]])
+; CHECK-DAG: [[VREG4:v([0-9]+)]] = vand([[VREG2]],[[VREG1]])
+; CHECK-DAG: [[VREG5:v([0-9]+)]] = vand([[VREG3]],[[VREG1]])
+; CHECK-DAG: [[QREG:q[0-9]+]] = vand([[VREG4]],r{{[0-9]+}})
+; CHECK-DAG: [[VREG6:v([0-9]+)]] = vand([[QREG]],r{{[0-9]+}})
+; CHECK-DAG: [[QREG1:q[0-9]+]] = vand([[VREG5]],r{{[0-9]+}})
+; CHECK-DAG: [[VREG7:v([0-9]+)]] = vand([[QREG1]],r{{[0-9]+}})
+; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
+; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
+; CHECK-DAG: [[VREG8:v([0-9]+)]] = vror(v{{[0-9]+}},r{{[0-9]+}})
+; CHECK-DAG: [[VREG9:v([0-9]+)]] = vor([[VREG8]],v{{[0-9]+}})
+; CHECK-DAG: q{{[0-9]+}} = vand([[VREG9]],r{{[0-9]+}})
+define void @bitcast_i64_to_v64i1_full(ptr %in, ptr %out) {
+entry:
+  %load = load i64, ptr %in, align 4
+  %bitcast = bitcast i64 %load to <64 x i1>
+  %e0 = extractelement <64 x i1> %bitcast, i32 0
+  %e1 = extractelement <64 x i1> %bitcast, i32 1
+  %z0 = zext i1 %e0 to i8
+  %z1 = zext i1 %e1 to i8
+  %ptr0 = getelementptr i8, ptr %out, i32 0
+  %ptr1 = getelementptr i8, ptr %out, i32 1
+  store i8 %z0, ptr %ptr0, align 1
+  store i8 %z1, ptr %ptr1, align 1
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/Hexagon/swp-phi.ll b/llvm/test/CodeGen/Hexagon/swp-phi.ll
index 9b0e126..6ce2481 100644
--- a/llvm/test/CodeGen/Hexagon/swp-phi.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-phi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=hexagon -enable-unsafe-fp-math -enable-pipeliner \
+; RUN: llc -mtriple=hexagon -enable-pipeliner \
 ; RUN:     -pipeliner-prune-deps=false -stats -o /dev/null < %s
 ; REQUIRES: asserts
 
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json
new file mode 100644
index 0000000..5de715b
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json
@@ -0,0 +1,22 @@
+{
+  "entities": {
+    "KILL": [0.1, 0.2, 0.3],
+    "MOV": [0.4, 0.5, 0.6],
+    "LEA": [0.7, 0.8, 0.9],
+    "RET": [1.0, 1.1, 1.2],
+    "ADD": [1.3, 1.4, 1.5],
+    "SUB": [1.6, 1.7, 1.8],
+    "IMUL": [1.9, 2.0, 2.1],
+    "AND": [2.2, 2.3, 2.4],
+    "OR": [2.5, 2.6, 2.7],
+    "XOR": [2.8, 2.9, 3.0],
+    "CMP": [3.1, 3.2, 3.3],
+    "TEST": [3.4, 3.5, 3.6],
+    "JMP": [3.7, 3.8, 3.9],
+    "CALL": [4.0, 4.1, 4.2],
+    "PUSH": [4.3, 4.4, 4.5],
+    "POP": [4.6, 4.7, 4.8],
+    "NOP": [4.9, 5.0, 5.1],
+    "COPY": [5.2, 5.3, 5.4]
+  }
+}
+\ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/if-else.mir b/llvm/test/CodeGen/MIR2Vec/if-else.mir
new file mode 100644
index 0000000..5734a23
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/if-else.mir
@@ -0,0 +1,144 @@
+# REQUIRES: x86-registered-target
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=none -print-mir2vec -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_3D_vocab.json %s -o /dev/null 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+  
+  define dso_local i32 @abc(i32 noundef %a, i32 noundef %b) {
+  entry:
+    %retval = alloca i32, align 4
+    %a.addr = alloca i32, align 4
+    %b.addr = alloca i32, align 4
+    store i32 %a, ptr %a.addr, align 4
+    store i32 %b, ptr %b.addr, align 4
+    %0 = load i32, ptr %a.addr, align 4
+    %1 = load i32, ptr %b.addr, align 4
+    %cmp = icmp sgt i32 %0, %1
+    br i1 %cmp, label %if.then, label %if.else
+  
+  if.then:                                          ; preds = %entry
+    %2 = load i32, ptr %b.addr, align 4
+    store i32 %2, ptr %retval, align 4
+    br label %return
+  
+  if.else:                                          ; preds = %entry
+    %3 = load i32, ptr %a.addr, align 4
+    store i32 %3, ptr %retval, align 4
+    br label %return
+  
+  return:                                           ; preds = %if.else, %if.then
+    %4 = load i32, ptr %retval, align 4
+    ret i32 %4
+  }
+...
+---
+name:            abc
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+noPhis:          false
+isSSA:           true
+noVRegs:         false
+hasFakeUses:     false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   true
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gr32, preferred-register: '', flags: [  ] }
+  - { id: 1, class: gr32, preferred-register: '', flags: [  ] }
+  - { id: 2, class: gr32, preferred-register: '', flags: [  ] }
+  - { id: 3, class: gr32, preferred-register: '', flags: [  ] }
+  - { id: 4, class: gr32, preferred-register: '', flags: [  ] }
+  - { id: 5, class: gr32, preferred-register: '', flags: [  ] }
+liveins:
+  - { reg: '$edi', virtual-reg: '%0' }
+  - { reg: '$esi', virtual-reg: '%1' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+fixedStack:      []
+stack:
+  - { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: a.addr, type: default, offset: 0, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: b.addr, type: default, offset: 0, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo:
+  amxProgModel:    None
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $edi, $esi
+  
+    %1:gr32 = COPY $esi
+    %0:gr32 = COPY $edi
+    MOV32mr %stack.1.a.addr, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.a.addr)
+    MOV32mr %stack.2.b.addr, 1, $noreg, 0, $noreg, %1 :: (store (s32) into %ir.b.addr)
+    %2:gr32 = SUB32rr %0, %1, implicit-def $eflags
+    JCC_1 %bb.2, 14, implicit $eflags
+    JMP_1 %bb.1
+  
+  bb.1.if.then:
+    successors: %bb.3(0x80000000)
+  
+    %4:gr32 = MOV32rm %stack.2.b.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.b.addr)
+    MOV32mr %stack.0.retval, 1, $noreg, 0, $noreg, killed %4 :: (store (s32) into %ir.retval)
+    JMP_1 %bb.3
+  
+  bb.2.if.else:
+    successors: %bb.3(0x80000000)
+  
+    %3:gr32 = MOV32rm %stack.1.a.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.a.addr)
+    MOV32mr %stack.0.retval, 1, $noreg, 0, $noreg, killed %3 :: (store (s32) into %ir.retval)
+  
+  bb.3.return:
+    %5:gr32 = MOV32rm %stack.0.retval, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.retval)
+    $eax = COPY %5
+    RET 0, $eax
+...
+
+# CHECK: Machine basic block vectors:
+# CHECK-NEXT: Machine basic block: abc:entry:
+# CHECK-NEXT:  [ 16.50  17.10  17.70 ]
+# CHECK-NEXT: Machine basic block: abc:if.then:
+# CHECK-NEXT:  [ 4.50  4.80  5.10 ]
+# CHECK-NEXT: Machine basic block: abc:if.else:
+# CHECK-NEXT:  [ 0.80  1.00  1.20 ]
+# CHECK-NEXT: Machine basic block: abc:return:
+# CHECK-NEXT:  [ 6.60  6.90  7.20 ]
+\ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir b/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir
new file mode 100644
index 0000000..338cb63
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir
@@ -0,0 +1,76 @@
+# REQUIRES: x86-registered-target
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=none -print-mir2vec -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_3D_vocab.json %s -o /dev/null 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+  
+  define dso_local noundef i32 @add_function(i32 noundef %a, i32 noundef %b) {
+  entry:
+    %sum = add nsw i32 %a, %b
+    %result = mul nsw i32 %sum, 2
+    ret i32 %result
+  }
+  
+  define dso_local void @simple_function() {
+  entry:
+    ret void
+  }
+...
+---
+name:            add_function
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+  - { id: 1, class: gr32 }
+  - { id: 2, class: gr32 }
+  - { id: 3, class: gr32 }
+liveins:
+  - { reg: '$edi', virtual-reg: '%0' }
+  - { reg: '$esi', virtual-reg: '%1' }
+body:             |
+  bb.0.entry:
+    liveins: $edi, $esi
+  
+    %1:gr32 = COPY $esi
+    %0:gr32 = COPY $edi
+    %2:gr32 = nsw ADD32rr %0, %1, implicit-def dead $eflags
+    %3:gr32 = ADD32rr %2, %2, implicit-def dead $eflags
+    $eax = COPY %3
+    RET 0, $eax
+
+---
+name:            simple_function
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    RET 0
+
+# CHECK: MIR2Vec embeddings for machine function add_function:
+# CHECK: Function vector:  [ 19.20  19.80  20.40 ]
+# CHECK-NEXT: Machine basic block vectors:
+# CHECK-NEXT: Machine basic block: add_function:entry:
+# CHECK-NEXT:  [ 19.20  19.80  20.40 ]
+# CHECK-NEXT: Machine instruction vectors:
+# CHECK-NEXT: Machine instruction: %1:gr32 = COPY $esi
+# CHECK-NEXT:  [ 5.20  5.30  5.40 ]
+# CHECK-NEXT: Machine instruction: %0:gr32 = COPY $edi
+# CHECK-NEXT:  [ 5.20  5.30  5.40 ]
+# CHECK-NEXT: Machine instruction: %2:gr32 = nsw ADD32rr %0:gr32(tied-def 0), %1:gr32, implicit-def dead $eflags
+# CHECK-NEXT:  [ 1.30  1.40  1.50 ]
+# CHECK-NEXT: Machine instruction: %3:gr32 = ADD32rr %2:gr32(tied-def 0), %2:gr32, implicit-def dead $eflags
+# CHECK-NEXT:  [ 1.30  1.40  1.50 ]
+# CHECK-NEXT: Machine instruction: $eax = COPY %3:gr32
+# CHECK-NEXT:  [ 5.20  5.30  5.40 ]
+# CHECK-NEXT: Machine instruction: RET 0, $eax
+# CHECK-NEXT:  [ 1.00  1.10  1.20 ]
+
+# CHECK: MIR2Vec embeddings for machine function simple_function:
+# CHECK-NEXT:Function vector:  [ 1.00  1.10  1.20 ]
+# CHECK-NEXT: Machine basic block vectors:
+# CHECK-NEXT: Machine basic block: simple_function:entry:
+# CHECK-NEXT:  [ 1.00  1.10  1.20 ]
+# CHECK-NEXT: Machine instruction vectors:
+# CHECK-NEXT: Machine instruction: RET 0
+# CHECK-NEXT:  [ 1.00  1.10  1.20 ]
+\ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
index 80b4048..c6554bc 100644
--- a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
+++ b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
@@ -1,8 +1,8 @@
-; REQUIRES: x86_64-linux
-; RUN: llc -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID
-; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM
-; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES
-; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS
+; REQUIRES: x86-registered-target
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS
 
 define dso_local void @test() {
   entry:
diff --git a/llvm/test/CodeGen/NVPTX/fma-assoc.ll b/llvm/test/CodeGen/NVPTX/fma-assoc.ll
index 6693c90..db0eae7 100644
--- a/llvm/test/CodeGen/NVPTX/fma-assoc.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-assoc.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | %ptxas-verify %}
 
 define ptx_device float @t1_f32(float %x, float %y, float %z,
 ; CHECK-UNSAFE-LABEL: t1_f32(
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index cad684e..baa127e 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -2,8 +2,8 @@
 ; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1                        | FileCheck %s --check-prefix=FMFDEBUG
 ; RUN: llc < %s -mtriple=powerpc64le                                                           | FileCheck %s --check-prefix=FMF
-; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -fp-contract=fast -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
-; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -fp-contract=fast -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
+; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -fp-contract=fast -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
+; RUN: llc < %s -mtriple=powerpc64le -fp-contract=fast -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
 
 ; Test FP transforms using instruction/node-level fast-math-flags.
 ; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
diff --git a/llvm/test/CodeGen/PowerPC/scalar-equal.ll b/llvm/test/CodeGen/PowerPC/scalar-equal.ll
index 1832475..c0b11b4 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-equal.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-equal.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names \
 ; RUN:   -verify-machineinstrs --enable-no-signed-zeros-fp-math \
 ; RUN:   --enable-no-nans-fp-math --enable-no-infs-fp-math \
 ; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
 ; RUN:   --check-prefix=FAST-P8
-; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names --enable-unsafe-fp-math \
+; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \
 ; RUN:   -verify-machineinstrs --enable-no-signed-zeros-fp-math \
 ; RUN:   --enable-no-nans-fp-math --enable-no-infs-fp-math \
 ; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
diff --git a/llvm/test/CodeGen/PowerPC/scalar-min-max-p10.ll b/llvm/test/CodeGen/PowerPC/scalar-min-max-p10.ll
index ca9baceb..5915bd3 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-min-max-p10.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-min-max-p10.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names --enable-unsafe-fp-math \
+; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names \
 ; RUN:   -verify-machineinstrs --enable-no-signed-zeros-fp-math \
 ; RUN:   --enable-no-nans-fp-math \
 ; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
diff --git a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
index fd0b494..881d1f4 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names \
 ; RUN:   -verify-machineinstrs --enable-no-signed-zeros-fp-math \
 ; RUN:   --enable-no-nans-fp-math --enable-no-infs-fp-math \
 ; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
 ; RUN:   --check-prefix=FAST-P8
-; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names --enable-unsafe-fp-math \
+; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \
 ; RUN:   -verify-machineinstrs --enable-no-signed-zeros-fp-math \
 ; RUN:   --enable-no-nans-fp-math --enable-no-infs-fp-math \
 ; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
diff --git a/llvm/test/CodeGen/RISCV/atomic-fence.ll b/llvm/test/CodeGen/RISCV/atomic-fence.ll
index 7103345..77148f6 100644
--- a/llvm/test/CodeGen/RISCV/atomic-fence.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-fence.ll
@@ -1,12 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck --check-prefixes=CHECK,WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN:   | FileCheck --check-prefixes=CHECK,WMO %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck --check-prefixes=CHECK,WMO %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
 ; RUN:   | FileCheck --check-prefixes=CHECK,TSO %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck --check-prefixes=CHECK,WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN:   | FileCheck --check-prefixes=CHECK,WMO %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck --check-prefixes=CHECK,WMO %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll
index 7e3abc7..c6234de 100644
--- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll
@@ -1,12 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I-ZALRSC %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64I-ZALRSC %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
@@ -44,6 +48,11 @@ define i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i8_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lb a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_load_i8_unordered:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    lb a0, 0(a0)
@@ -59,6 +68,11 @@ define i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i8_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lb a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_load_i8_unordered:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    lb a0, 0(a0)
@@ -78,6 +92,11 @@ define i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lb a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_load_i8_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    lb a0, 0(a0)
@@ -93,6 +112,11 @@ define i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lb a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_load_i8_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    lb a0, 0(a0)
@@ -112,6 +136,12 @@ define i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lb a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    fence r, rw
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    lb a0, 0(a0)
@@ -133,6 +163,12 @@ define i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lb a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence r, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    lb a0, 0(a0)
@@ -200,6 +236,13 @@ define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    fence rw, rw
+; RV32I-ZALRSC-NEXT:    lb a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    fence r, rw
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, rw
@@ -223,6 +266,13 @@ define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, rw
+; RV64I-ZALRSC-NEXT:    lb a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence r, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
@@ -286,6 +336,11 @@ define i16 @atomic_load_i16_unordered(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i16_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lh a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_load_i16_unordered:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    lh a0, 0(a0)
@@ -301,6 +356,11 @@ define i16 @atomic_load_i16_unordered(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i16_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lh a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_load_i16_unordered:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    lh a0, 0(a0)
@@ -320,6 +380,11 @@ define i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lh a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_load_i16_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    lh a0, 0(a0)
@@ -335,6 +400,11 @@ define i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lh a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_load_i16_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    lh a0, 0(a0)
@@ -354,6 +424,12 @@ define i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lh a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    fence r, rw
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_load_i16_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    lh a0, 0(a0)
@@ -375,6 +451,12 @@ define i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lh a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence r, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_load_i16_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    lh a0, 0(a0)
@@ -442,6 +524,13 @@ define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    fence rw, rw
+; RV32I-ZALRSC-NEXT:    lh a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    fence r, rw
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, rw
@@ -465,6 +554,13 @@ define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, rw
+; RV64I-ZALRSC-NEXT:    lh a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence r, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
@@ -528,6 +624,11 @@ define i32 @atomic_load_i32_unordered(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i32_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_load_i32_unordered:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    lw a0, 0(a0)
@@ -543,6 +644,11 @@ define i32 @atomic_load_i32_unordered(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i32_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lw a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_load_i32_unordered:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    lw a0, 0(a0)
@@ -562,6 +668,11 @@ define i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_load_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    lw a0, 0(a0)
@@ -577,6 +688,11 @@ define i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lw a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_load_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    lw a0, 0(a0)
@@ -596,6 +712,12 @@ define i32 @atomic_load_i32_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    fence r, rw
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_load_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    lw a0, 0(a0)
@@ -617,6 +739,12 @@ define i32 @atomic_load_i32_acquire(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lw a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence r, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_load_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    lw a0, 0(a0)
@@ -684,6 +812,13 @@ define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    fence rw, rw
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    fence r, rw
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_load_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, rw
@@ -707,6 +842,13 @@ define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, rw
+; RV64I-ZALRSC-NEXT:    lw a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence r, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
@@ -770,6 +912,16 @@ define i64 @atomic_load_i64_unordered(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i64_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a1, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_load_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_load_i64_unordered:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -790,6 +942,11 @@ define i64 @atomic_load_i64_unordered(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i64_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    ld a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_load_i64_unordered:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    ld a0, 0(a0)
@@ -809,6 +966,16 @@ define i64 @atomic_load_i64_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a1, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_load_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_load_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -829,6 +996,11 @@ define i64 @atomic_load_i64_monotonic(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    ld a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_load_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    ld a0, 0(a0)
@@ -848,6 +1020,16 @@ define i64 @atomic_load_i64_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a1, 2
+; RV32I-ZALRSC-NEXT:    call __atomic_load_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_load_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -868,6 +1050,12 @@ define i64 @atomic_load_i64_acquire(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    ld a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence r, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_load_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    ld a0, 0(a0)
@@ -914,6 +1102,16 @@ define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a1, 5
+; RV32I-ZALRSC-NEXT:    call __atomic_load_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_load_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -934,6 +1132,13 @@ define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_load_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, rw
+; RV64I-ZALRSC-NEXT:    ld a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence r, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_load_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
@@ -979,6 +1184,11 @@ define void @atomic_store_i8_unordered(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i8_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    sb a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_store_i8_unordered:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    sb a1, 0(a0)
@@ -994,6 +1204,11 @@ define void @atomic_store_i8_unordered(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i8_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sb a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_store_i8_unordered:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    sb a1, 0(a0)
@@ -1013,6 +1228,11 @@ define void @atomic_store_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    sb a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_store_i8_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    sb a1, 0(a0)
@@ -1028,6 +1248,11 @@ define void @atomic_store_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sb a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_store_i8_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    sb a1, 0(a0)
@@ -1047,6 +1272,12 @@ define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    fence rw, w
+; RV32I-ZALRSC-NEXT:    sb a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_store_i8_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, w
@@ -1068,6 +1299,12 @@ define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, w
+; RV64I-ZALRSC-NEXT:    sb a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_store_i8_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, w
@@ -1135,6 +1372,13 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    fence rw, w
+; RV32I-ZALRSC-NEXT:    sb a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    fence rw, rw
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_store_i8_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, w
@@ -1157,6 +1401,13 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, w
+; RV64I-ZALRSC-NEXT:    sb a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence rw, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_store_i8_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, w
@@ -1219,6 +1470,11 @@ define void @atomic_store_i16_unordered(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i16_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    sh a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_store_i16_unordered:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    sh a1, 0(a0)
@@ -1234,6 +1490,11 @@ define void @atomic_store_i16_unordered(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i16_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sh a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_store_i16_unordered:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    sh a1, 0(a0)
@@ -1253,6 +1514,11 @@ define void @atomic_store_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    sh a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_store_i16_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    sh a1, 0(a0)
@@ -1268,6 +1534,11 @@ define void @atomic_store_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sh a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_store_i16_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    sh a1, 0(a0)
@@ -1287,6 +1558,12 @@ define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    fence rw, w
+; RV32I-ZALRSC-NEXT:    sh a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_store_i16_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, w
@@ -1308,6 +1585,12 @@ define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, w
+; RV64I-ZALRSC-NEXT:    sh a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_store_i16_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, w
@@ -1375,6 +1658,13 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    fence rw, w
+; RV32I-ZALRSC-NEXT:    sh a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    fence rw, rw
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_store_i16_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, w
@@ -1397,6 +1687,13 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, w
+; RV64I-ZALRSC-NEXT:    sh a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence rw, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_store_i16_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, w
@@ -1459,6 +1756,11 @@ define void @atomic_store_i32_unordered(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i32_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    sw a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_store_i32_unordered:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    sw a1, 0(a0)
@@ -1474,6 +1776,11 @@ define void @atomic_store_i32_unordered(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i32_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_store_i32_unordered:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    sw a1, 0(a0)
@@ -1493,6 +1800,11 @@ define void @atomic_store_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    sw a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_store_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    sw a1, 0(a0)
@@ -1508,6 +1820,11 @@ define void @atomic_store_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_store_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    sw a1, 0(a0)
@@ -1527,6 +1844,12 @@ define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    fence rw, w
+; RV32I-ZALRSC-NEXT:    sw a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_store_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, w
@@ -1548,6 +1871,12 @@ define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, w
+; RV64I-ZALRSC-NEXT:    sw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_store_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, w
@@ -1615,6 +1944,13 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    fence rw, w
+; RV32I-ZALRSC-NEXT:    sw a1, 0(a0)
+; RV32I-ZALRSC-NEXT:    fence rw, rw
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomic_store_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, w
@@ -1637,6 +1973,13 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, w
+; RV64I-ZALRSC-NEXT:    sw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence rw, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_store_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, w
@@ -1699,6 +2042,16 @@ define void @atomic_store_i64_unordered(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i64_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_store_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_store_i64_unordered:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -1719,6 +2072,11 @@ define void @atomic_store_i64_unordered(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i64_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sd a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_store_i64_unordered:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    sd a1, 0(a0)
@@ -1738,6 +2096,16 @@ define void @atomic_store_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_store_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_store_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -1758,6 +2126,11 @@ define void @atomic_store_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sd a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomic_store_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    sd a1, 0(a0)
@@ -1777,6 +2150,16 @@ define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 3
+; RV32I-ZALRSC-NEXT:    call __atomic_store_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_store_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -1797,6 +2180,12 @@ define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, w
+; RV64I-ZALRSC-NEXT:    sd a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_store_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, w
@@ -1843,6 +2232,16 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_store_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 5
+; RV32I-ZALRSC-NEXT:    call __atomic_store_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomic_store_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -1863,6 +2262,13 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomic_store_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    fence rw, w
+; RV64I-ZALRSC-NEXT:    sd a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    fence rw, rw
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomic_store_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, w
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll
index 4dafd6a..d5238ab 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll
@@ -3,10 +3,14 @@
 ; RUN:   | FileCheck -check-prefix=RV32I %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA %s
+; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32I-ZALRSC %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA %s
+; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64I-ZALRSC %s
 
 define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind {
 ; RV32I-LABEL: atomicrmw_sub_i32_constant:
@@ -26,6 +30,18 @@ define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind {
 ; RV32IA-NEXT:    amoadd.w.aqrl a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_constant:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a1, (a0)
+; RV32I-ZALRSC-NEXT:    sub a3, a1, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB0_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a1
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_sub_i32_constant:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -42,6 +58,18 @@ define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind {
 ; RV64IA-NEXT:    li a1, -1
 ; RV64IA-NEXT:    amoadd.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_constant:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a1, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB0_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i32 1 seq_cst
   ret i32 %1
 }
@@ -71,6 +99,18 @@ define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_constant:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a1, 1
+; RV32I-ZALRSC-NEXT:    li a3, 5
+; RV32I-ZALRSC-NEXT:    li a2, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_sub_i64_constant:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -87,6 +127,18 @@ define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind {
 ; RV64IA-NEXT:    li a1, -1
 ; RV64IA-NEXT:    amoadd.d.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_constant:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a1, a2
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB1_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i64 1 seq_cst
   ret i64 %1
 }
@@ -109,6 +161,18 @@ define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind {
 ; RV32IA-NEXT:    amoadd.w.aqrl a0, a2, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_neg:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    sub a2, a1, a2
+; RV32I-ZALRSC-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a1, (a0)
+; RV32I-ZALRSC-NEXT:    sub a3, a1, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB2_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a1
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_sub_i32_neg:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -125,6 +189,18 @@ define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind {
 ; RV64IA-NEXT:    sub a2, a2, a1
 ; RV64IA-NEXT:    amoadd.w.aqrl a0, a2, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_neg:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    subw a2, a1, a2
+; RV64I-ZALRSC-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a1, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB2_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %b = sub i32 %x, %y
   %1 = atomicrmw sub ptr %a, i32 %b seq_cst
   ret i32 %1
@@ -159,6 +235,20 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_neg:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sltu a5, a1, a3
+; RV32I-ZALRSC-NEXT:    sub a2, a2, a4
+; RV32I-ZALRSC-NEXT:    sub a2, a2, a5
+; RV32I-ZALRSC-NEXT:    sub a1, a1, a3
+; RV32I-ZALRSC-NEXT:    li a3, 5
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_sub_i64_neg:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -175,6 +265,18 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind {
 ; RV64IA-NEXT:    sub a2, a2, a1
 ; RV64IA-NEXT:    amoadd.d.aqrl a0, a2, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_neg:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sub a2, a1, a2
+; RV64I-ZALRSC-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a1, a2
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB3_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %b = sub i64 %x, %y
   %1 = atomicrmw sub ptr %a, i64 %b seq_cst
   ret i64 %1
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 1213256..26feb83 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -1,12 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32I-ZALRSC %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS,RV32IA-WMO,RV32IA-WMO-NOZACAS %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS,RV32IA-TSO,RV32IA-TSO-NOZACAS %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64I-ZALRSC %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS,RV64IA-WMO,RV64IA-WMO-NOZACAS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
@@ -50,6 +54,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB0_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -80,6 +104,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB0_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -174,6 +218,26 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB1_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -224,6 +288,26 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB1_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -378,6 +462,26 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB2_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -428,6 +532,26 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB2_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -582,6 +706,26 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB3_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -632,6 +776,26 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB3_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -786,6 +950,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB4_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -816,6 +1000,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB4_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -914,6 +1118,22 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a2, 255
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:    not a2, a2
+; RV32I-ZALRSC-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB5_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a1, a0, -4
@@ -936,6 +1156,22 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a2, 255
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:    not a2, a2
+; RV64I-ZALRSC-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB5_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1004,6 +1240,22 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a2, 255
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:    not a2, a2
+; RV32I-ZALRSC-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB6_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1037,6 +1289,22 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a2, 255
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:    not a2, a2
+; RV64I-ZALRSC-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB6_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1138,6 +1406,22 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a2, 255
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:    not a2, a2
+; RV32I-ZALRSC-NEXT:  .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB7_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1171,6 +1455,22 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a2, 255
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:    not a2, a2
+; RV64I-ZALRSC-NEXT:  .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB7_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1272,6 +1572,22 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a2, 255
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:    not a2, a2
+; RV32I-ZALRSC-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB8_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1305,6 +1621,22 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a2, 255
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:    not a2, a2
+; RV64I-ZALRSC-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB8_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1406,6 +1738,22 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a2, 255
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:    not a2, a2
+; RV32I-ZALRSC-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a1)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB9_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1439,6 +1787,22 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a2, 255
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:    not a2, a2
+; RV64I-ZALRSC-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a1)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB9_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1540,6 +1904,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a2, 255
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB10_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1561,6 +1940,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a2, 255
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB10_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1630,6 +2024,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a2, 255
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB11_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1661,6 +2070,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a2, 255
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB11_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1760,6 +2184,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a2, 255
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB12_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1791,6 +2230,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a2, 255
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB12_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1890,6 +2344,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a2, 255
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB13_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -1921,6 +2390,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a2, 255
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB13_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -2020,6 +2504,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a2, 255
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a1)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB14_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -2051,6 +2550,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a2, 255
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a1)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB14_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -2149,6 +2663,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB15_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -2179,6 +2713,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB15_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -2273,6 +2827,26 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB16_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -2323,6 +2897,26 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB16_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -2477,6 +3071,26 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB17_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -2527,6 +3141,26 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB17_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -2681,6 +3315,26 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB18_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -2731,6 +3385,26 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB18_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -2885,6 +3559,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB19_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -2915,6 +3609,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB19_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3009,6 +3723,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB20_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3039,6 +3773,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB20_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3137,6 +3891,26 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB21_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3187,6 +3961,26 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB21_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3345,6 +4139,26 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB22_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3395,6 +4209,26 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB22_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3553,6 +4387,26 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB23_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3603,6 +4457,26 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB23_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3761,6 +4635,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB24_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3791,6 +4685,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB24_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3889,6 +4803,25 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB25_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3913,6 +4846,25 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB25_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3989,6 +4941,25 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB26_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4027,6 +4998,25 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB26_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4145,6 +5135,25 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB27_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4183,6 +5192,25 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB27_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4301,6 +5329,25 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB28_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4339,6 +5386,25 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB28_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4457,6 +5523,25 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB29_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4495,6 +5580,25 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB29_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4613,6 +5717,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a4, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB30_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4644,6 +5769,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a4, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB30_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4865,6 +6011,27 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a4, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB31_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4917,6 +6084,27 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a4, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB31_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -5201,6 +6389,27 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a4, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB32_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -5253,6 +6462,27 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a4, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB32_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -5537,6 +6767,27 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a4, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB33_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -5589,6 +6840,27 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a4, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB33_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -5873,6 +7145,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a4, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB34_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -5904,6 +7197,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a4, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB34_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6129,6 +7443,21 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB35_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6149,6 +7478,21 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB35_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6213,6 +7557,21 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB36_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6243,6 +7602,21 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB36_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6337,6 +7711,21 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB37_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6367,6 +7756,21 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB37_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6461,6 +7865,21 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB38_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6491,6 +7910,21 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB38_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6585,6 +8019,21 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB39_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6615,6 +8064,21 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB39_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6709,6 +8173,21 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB40_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6729,6 +8208,21 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB40_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6793,6 +8287,21 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB41_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6823,6 +8332,21 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB41_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6917,6 +8441,21 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB42_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB42_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -6947,6 +8486,21 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB42_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB42_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -7041,6 +8595,21 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB43_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB43_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -7071,6 +8640,21 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB43_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB43_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -7165,6 +8749,21 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB44_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -7195,6 +8794,21 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB44_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -7321,6 +8935,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB45_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB45_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -7392,6 +9035,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB45_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB45_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -7545,6 +9217,35 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB46_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB46_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -7645,6 +9346,35 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB46_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB46_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -7885,6 +9615,35 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB47_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB47_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -7985,6 +9744,35 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB47_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB47_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -8225,6 +10013,35 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB48_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB48_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -8325,6 +10142,35 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB48_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB48_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -8565,6 +10411,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB49_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB49_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -8636,6 +10511,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB49_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB49_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -8789,6 +10693,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB50_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB50_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -8860,6 +10793,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB50_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB50_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -9013,6 +10975,35 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB51_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB51_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -9113,6 +11104,35 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB51_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB51_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -9353,6 +11373,35 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB52_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB52_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -9453,6 +11502,35 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB52_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB52_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -9693,6 +11771,35 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB53_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB53_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -9793,6 +11900,35 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB53_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB53_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -10033,6 +12169,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB54_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB54_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -10104,6 +12269,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB54_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB54_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -10255,6 +12449,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB55_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB55_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB55_3: # in Loop: Header=BB55_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB55_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -10319,6 +12537,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB55_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB55_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB55_3: # in Loop: Header=BB55_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB55_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -10455,6 +12697,30 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB56_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB56_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -10543,6 +12809,30 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB56_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB56_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -10751,6 +13041,30 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB57_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB57_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -10839,6 +13153,30 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB57_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB57_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -11047,6 +13385,30 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB58_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB58_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -11135,6 +13497,30 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB58_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB58_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -11343,6 +13729,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB59_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB59_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB59_3: # in Loop: Header=BB59_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB59_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -11407,6 +13817,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB59_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB59_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB59_3: # in Loop: Header=BB59_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB59_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -11543,6 +13977,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB60_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB60_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB60_3: # in Loop: Header=BB60_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB60_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -11607,6 +14065,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB60_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB60_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB60_3: # in Loop: Header=BB60_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB60_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -11743,6 +14225,30 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB61_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB61_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -11831,6 +14337,30 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB61_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB61_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -12039,6 +14569,30 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB62_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB62_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -12127,6 +14681,30 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB62_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB62_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -12335,6 +14913,30 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB63_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB63_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -12423,6 +15025,30 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB63_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB63_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -12631,6 +15257,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB64_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB64_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB64_3: # in Loop: Header=BB64_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB64_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -12695,6 +15345,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB64_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB64_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB64_3: # in Loop: Header=BB64_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB64_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -12801,6 +15475,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB65_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -12832,6 +15527,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB65_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -12929,6 +15645,27 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB66_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -12981,6 +15718,27 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB66_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -13141,6 +15899,27 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB67_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -13193,6 +15972,27 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB67_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -13353,6 +16153,27 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB68_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -13405,6 +16226,27 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB68_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -13565,6 +16407,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB69_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -13596,6 +16459,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB69_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -13697,6 +16581,23 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a2, 16
+; RV32I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:    not a2, a2
+; RV32I-ZALRSC-NEXT:  .LBB70_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB70_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a1, a0, -4
@@ -13720,6 +16621,23 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a2, 16
+; RV64I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:    not a2, a2
+; RV64I-ZALRSC-NEXT:  .LBB70_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB70_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a1, a0, -4
@@ -13791,6 +16709,23 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a2, 16
+; RV32I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:    not a2, a2
+; RV32I-ZALRSC-NEXT:  .LBB71_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB71_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -13826,6 +16761,23 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a2, 16
+; RV64I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:    not a2, a2
+; RV64I-ZALRSC-NEXT:  .LBB71_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB71_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -13933,6 +16885,23 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a2, 16
+; RV32I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:    not a2, a2
+; RV32I-ZALRSC-NEXT:  .LBB72_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB72_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -13968,6 +16937,23 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a2, 16
+; RV64I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:    not a2, a2
+; RV64I-ZALRSC-NEXT:  .LBB72_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB72_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14075,6 +17061,23 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a2, 16
+; RV32I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:    not a2, a2
+; RV32I-ZALRSC-NEXT:  .LBB73_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB73_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14110,6 +17113,23 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a2, 16
+; RV64I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:    not a2, a2
+; RV64I-ZALRSC-NEXT:  .LBB73_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB73_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14217,6 +17237,23 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a2, 16
+; RV32I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:    not a2, a2
+; RV32I-ZALRSC-NEXT:  .LBB74_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a1)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB74_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14252,6 +17289,23 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a2, 16
+; RV64I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:    not a2, a2
+; RV64I-ZALRSC-NEXT:  .LBB74_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a1)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB74_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14360,6 +17414,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a2, 16
+; RV32I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB75_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB75_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14383,6 +17453,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a2, 16
+; RV64I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB75_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB75_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14456,6 +17542,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a2, 16
+; RV32I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB76_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB76_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14490,6 +17592,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a2, 16
+; RV64I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB76_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB76_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14596,6 +17714,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a2, 16
+; RV32I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB77_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB77_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14630,6 +17764,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a2, 16
+; RV64I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB77_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB77_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14736,6 +17886,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a2, 16
+; RV32I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB78_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB78_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14770,6 +17936,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a2, 16
+; RV64I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB78_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB78_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14876,6 +18058,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a2, 16
+; RV32I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB79_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a1)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB79_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -14910,6 +18108,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a2, 16
+; RV64I-ZALRSC-NEXT:    addi a2, a2, -1
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB79_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a1)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB79_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
@@ -15014,6 +18228,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB80_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB80_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15045,6 +18280,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB80_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB80_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15142,6 +18398,27 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB81_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15194,6 +18471,27 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB81_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15354,6 +18652,27 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB82_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15406,6 +18725,27 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB82_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15566,6 +18906,27 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB83_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15618,6 +18979,27 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB83_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15778,6 +19160,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB84_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB84_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15809,6 +19212,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB84_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB84_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15906,6 +19330,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB85_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB85_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -15937,6 +19382,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB85_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB85_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16038,6 +19504,27 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB86_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16090,6 +19577,27 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB86_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16254,6 +19762,27 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB87_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16306,6 +19835,27 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB87_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16470,6 +20020,27 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB88_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16522,6 +20093,27 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB88_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16686,6 +20278,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB89_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB89_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16717,6 +20330,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB89_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB89_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16818,6 +20452,26 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    not a3, a4
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB90_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB90_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16843,6 +20497,26 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    not a3, a4
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB90_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB90_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16922,6 +20596,26 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    not a3, a4
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB91_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB91_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -16962,6 +20656,26 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    not a3, a4
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB91_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB91_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -17086,6 +20800,26 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    not a3, a4
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB92_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB92_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -17126,6 +20860,26 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    not a3, a4
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB92_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB92_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -17250,6 +21004,26 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    not a3, a4
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB93_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB93_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -17290,6 +21064,26 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    not a3, a4
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB93_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB93_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -17414,6 +21208,26 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    not a3, a4
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB94_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB94_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -17454,6 +21268,26 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    not a3, a4
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB94_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB94_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -17578,6 +21412,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a3, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB95_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -17610,6 +21466,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a3, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB95_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -17838,6 +21716,28 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a3, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB96_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -17892,6 +21792,28 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a3, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB96_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -18186,6 +22108,28 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a3, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB97_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -18240,6 +22184,28 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a3, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB97_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -18534,6 +22500,28 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a3, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB98_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -18588,6 +22576,28 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a3, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB98_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -18882,6 +22892,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a3, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB99_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -18914,6 +22946,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a3, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB99_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19146,6 +23200,22 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB100_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB100_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19167,6 +23237,22 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB100_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB100_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19234,6 +23320,22 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB101_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB101_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19266,6 +23368,22 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB101_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB101_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19366,6 +23484,22 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB102_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB102_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19398,6 +23532,22 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB102_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB102_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19498,6 +23648,22 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB103_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB103_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19530,6 +23696,22 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB103_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB103_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19630,6 +23812,22 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB104_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB104_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19662,6 +23860,22 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB104_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB104_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19762,6 +23976,22 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB105_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB105_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19783,6 +24013,22 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB105_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB105_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19850,6 +24096,22 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB106_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB106_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19882,6 +24144,22 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB106_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB106_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -19982,6 +24260,22 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB107_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB107_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -20014,6 +24308,22 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB107_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB107_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -20114,6 +24424,22 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB108_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB108_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -20146,6 +24472,22 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB108_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB108_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -20246,6 +24588,22 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB109_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB109_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -20278,6 +24636,22 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB109_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB109_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -20410,6 +24784,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB110_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB110_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB110_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB110_3: # in Loop: Header=BB110_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB110_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -20483,6 +24888,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB110_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB110_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB110_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB110_3: # in Loop: Header=BB110_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB110_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -20642,6 +25078,37 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB111_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB111_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -20746,6 +25213,37 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB111_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB111_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -20998,6 +25496,37 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB112_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB112_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -21102,6 +25631,37 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB112_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB112_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -21354,6 +25914,37 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB113_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB113_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -21458,6 +26049,37 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB113_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB113_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -21710,6 +26332,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB114_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB114_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB114_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB114_3: # in Loop: Header=BB114_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB114_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -21783,6 +26436,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB114_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB114_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB114_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB114_3: # in Loop: Header=BB114_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB114_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -21942,6 +26626,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB115_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB115_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB115_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB115_3: # in Loop: Header=BB115_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB115_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -22015,6 +26730,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB115_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB115_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB115_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB115_3: # in Loop: Header=BB115_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB115_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -22174,6 +26920,37 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB116_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB116_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -22278,6 +27055,37 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB116_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB116_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -22530,6 +27338,37 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB117_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB117_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -22634,6 +27473,37 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB117_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB117_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -22886,6 +27756,37 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB118_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB118_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -22990,6 +27891,37 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB118_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB118_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -23242,6 +28174,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB119_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB119_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB119_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB119_3: # in Loop: Header=BB119_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB119_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -23315,6 +28278,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB119_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB119_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB119_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB119_3: # in Loop: Header=BB119_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB119_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -23476,6 +28470,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB120_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB120_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB120_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB120_3: # in Loop: Header=BB120_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB120_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -23545,6 +28564,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB120_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB120_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB120_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB120_3: # in Loop: Header=BB120_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB120_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -23688,6 +28732,31 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB121_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB121_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -23782,6 +28851,31 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB121_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB121_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -24000,6 +29094,31 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB122_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB122_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -24094,6 +29213,31 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB122_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB122_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -24312,6 +29456,31 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB123_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB123_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -24406,6 +29575,31 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB123_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB123_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -24624,6 +29818,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB124_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB124_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB124_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -24693,6 +29912,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB124_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB124_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB124_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -24836,6 +30080,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB125_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB125_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB125_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -24905,6 +30174,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB125_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB125_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB125_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -25048,6 +30342,31 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB126_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB126_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -25142,6 +30461,31 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB126_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB126_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -25360,6 +30704,31 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB127_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB127_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -25454,6 +30823,31 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB127_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB127_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -25672,6 +31066,31 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB128_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB128_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -25766,6 +31185,31 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB128_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB128_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
@@ -25984,6 +31428,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB129_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB129_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB129_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -26053,6 +31522,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB129_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB129_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB129_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -26162,6 +31656,17 @@ define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB130_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB130_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xchg_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    amoswap.w a0, a1, (a0)
@@ -26177,6 +31682,17 @@ define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB130_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB130_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_xchg_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoswap.w a0, a1, (a0)
@@ -26196,6 +31712,17 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB131_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB131_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoswap.w.aq a0, a1, (a0)
@@ -26216,6 +31743,17 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB131_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB131_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoswap.w.aq a0, a1, (a0)
@@ -26240,6 +31778,17 @@ define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB132_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB132_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoswap.w.rl a0, a1, (a0)
@@ -26260,6 +31809,17 @@ define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB132_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB132_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoswap.w.rl a0, a1, (a0)
@@ -26284,6 +31844,17 @@ define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB133_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB133_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_acq_rel:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoswap.w.aqrl a0, a1, (a0)
@@ -26304,6 +31875,17 @@ define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB133_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB133_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoswap.w.aqrl a0, a1, (a0)
@@ -26328,6 +31910,17 @@ define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB134_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB134_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoswap.w.aqrl a0, a1, (a0)
@@ -26348,6 +31941,17 @@ define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB134_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB134_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoswap.w.aqrl a0, a1, (a0)
@@ -26372,6 +31976,17 @@ define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB135_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    add a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB135_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_add_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    amoadd.w a0, a1, (a0)
@@ -26387,6 +32002,17 @@ define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB135_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB135_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_add_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoadd.w a0, a1, (a0)
@@ -26406,6 +32032,17 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB136_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    add a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB136_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_add_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoadd.w.aq a0, a1, (a0)
@@ -26426,6 +32063,17 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB136_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB136_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_add_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoadd.w.aq a0, a1, (a0)
@@ -26450,6 +32098,17 @@ define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB137_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    add a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB137_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_add_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoadd.w.rl a0, a1, (a0)
@@ -26470,6 +32129,17 @@ define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB137_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB137_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_add_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoadd.w.rl a0, a1, (a0)
@@ -26494,6 +32164,17 @@ define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB138_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    add a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB138_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_add_i32_acq_rel:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoadd.w.aqrl a0, a1, (a0)
@@ -26514,6 +32195,17 @@ define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB138_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB138_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_add_i32_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoadd.w.aqrl a0, a1, (a0)
@@ -26538,6 +32230,17 @@ define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB139_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    add a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB139_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_add_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoadd.w.aqrl a0, a1, (a0)
@@ -26558,6 +32261,17 @@ define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB139_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB139_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_add_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoadd.w.aqrl a0, a1, (a0)
@@ -26582,6 +32296,17 @@ define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB140_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB140_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_sub_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    neg a1, a1
@@ -26598,6 +32323,17 @@ define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB140_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB140_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_sub_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    neg a1, a1
@@ -26618,6 +32354,17 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB141_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB141_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    neg a1, a1
@@ -26640,6 +32387,17 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB141_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB141_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    neg a1, a1
@@ -26666,6 +32424,17 @@ define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB142_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB142_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    neg a1, a1
@@ -26688,6 +32457,17 @@ define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB142_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB142_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    neg a1, a1
@@ -26714,6 +32494,17 @@ define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB143_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB143_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_acq_rel:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    neg a1, a1
@@ -26736,6 +32527,17 @@ define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB143_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB143_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    neg a1, a1
@@ -26762,6 +32564,17 @@ define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB144_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB144_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    neg a1, a1
@@ -26784,6 +32597,17 @@ define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB144_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB144_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    neg a1, a1
@@ -26810,6 +32634,17 @@ define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB145_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB145_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_and_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    amoand.w a0, a1, (a0)
@@ -26825,6 +32660,17 @@ define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB145_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB145_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_and_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoand.w a0, a1, (a0)
@@ -26844,6 +32690,17 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB146_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB146_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_and_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoand.w.aq a0, a1, (a0)
@@ -26864,6 +32721,17 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB146_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB146_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_and_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoand.w.aq a0, a1, (a0)
@@ -26888,6 +32756,17 @@ define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB147_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB147_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_and_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoand.w.rl a0, a1, (a0)
@@ -26908,6 +32787,17 @@ define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB147_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB147_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_and_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoand.w.rl a0, a1, (a0)
@@ -26932,6 +32822,17 @@ define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB148_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB148_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_and_i32_acq_rel:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoand.w.aqrl a0, a1, (a0)
@@ -26952,6 +32853,17 @@ define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB148_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB148_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_and_i32_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoand.w.aqrl a0, a1, (a0)
@@ -26976,6 +32888,17 @@ define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB149_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB149_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_and_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoand.w.aqrl a0, a1, (a0)
@@ -26996,6 +32919,17 @@ define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB149_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB149_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_and_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoand.w.aqrl a0, a1, (a0)
@@ -27020,6 +32954,18 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB150_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB150_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:  .LBB150_1: # =>This Inner Loop Header: Depth=1
@@ -27042,6 +32988,18 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB150_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB150_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:  .LBB150_1: # =>This Inner Loop Header: Depth=1
@@ -27200,6 +33158,18 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB151_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
@@ -27234,6 +33204,18 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB151_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
@@ -27432,6 +33414,18 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB152_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_release:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
@@ -27466,6 +33460,18 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB152_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
@@ -27664,6 +33670,18 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB153_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
 ; RV32IA-WMO-NOZACAS:       # %bb.0:
 ; RV32IA-WMO-NOZACAS-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
@@ -27698,6 +33716,18 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB153_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
@@ -27896,6 +33926,18 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB154_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB154_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
 ; RV32IA-NOZACAS:       # %bb.0:
 ; RV32IA-NOZACAS-NEXT:  .LBB154_1: # =>This Inner Loop Header: Depth=1
@@ -27918,6 +33960,18 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB154_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB154_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:  .LBB154_1: # =>This Inner Loop Header: Depth=1
@@ -28112,6 +34166,17 @@ define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB155_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    or a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB155_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_or_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    amoor.w a0, a1, (a0)
@@ -28127,6 +34192,17 @@ define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB155_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB155_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_or_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoor.w a0, a1, (a0)
@@ -28146,6 +34222,17 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB156_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    or a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB156_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_or_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoor.w.aq a0, a1, (a0)
@@ -28166,6 +34253,17 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB156_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB156_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_or_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoor.w.aq a0, a1, (a0)
@@ -28190,6 +34288,17 @@ define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB157_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    or a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB157_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_or_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoor.w.rl a0, a1, (a0)
@@ -28210,6 +34319,17 @@ define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB157_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB157_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_or_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoor.w.rl a0, a1, (a0)
@@ -28234,6 +34354,17 @@ define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB158_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    or a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB158_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_or_i32_acq_rel:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoor.w.aqrl a0, a1, (a0)
@@ -28254,6 +34385,17 @@ define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB158_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB158_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_or_i32_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoor.w.aqrl a0, a1, (a0)
@@ -28278,6 +34420,17 @@ define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB159_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    or a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB159_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_or_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoor.w.aqrl a0, a1, (a0)
@@ -28298,6 +34451,17 @@ define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB159_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB159_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_or_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoor.w.aqrl a0, a1, (a0)
@@ -28322,6 +34486,17 @@ define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB160_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB160_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xor_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    amoxor.w a0, a1, (a0)
@@ -28337,6 +34512,17 @@ define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB160_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB160_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_xor_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoxor.w a0, a1, (a0)
@@ -28356,6 +34542,17 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB161_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB161_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoxor.w.aq a0, a1, (a0)
@@ -28376,6 +34573,17 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB161_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB161_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoxor.w.aq a0, a1, (a0)
@@ -28400,6 +34608,17 @@ define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB162_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB162_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoxor.w.rl a0, a1, (a0)
@@ -28420,6 +34639,17 @@ define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB162_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB162_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoxor.w.rl a0, a1, (a0)
@@ -28444,6 +34674,17 @@ define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB163_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB163_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_acq_rel:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoxor.w.aqrl a0, a1, (a0)
@@ -28464,6 +34705,17 @@ define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB163_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB163_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoxor.w.aqrl a0, a1, (a0)
@@ -28488,6 +34740,17 @@ define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB164_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB164_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amoxor.w.aqrl a0, a1, (a0)
@@ -28508,6 +34771,17 @@ define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB164_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB164_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoxor.w.aqrl a0, a1, (a0)
@@ -28558,6 +34832,21 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB165_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a3, a1, .LBB165_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB165_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB165_3: # in Loop: Header=BB165_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB165_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_max_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    amomax.w a0, a1, (a0)
@@ -28602,6 +34891,22 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB165_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a3, a2, .LBB165_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB165_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB165_3: # in Loop: Header=BB165_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB165_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_max_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomax.w a0, a1, (a0)
@@ -28647,6 +34952,21 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB166_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a3, a1, .LBB166_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB166_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB166_3: # in Loop: Header=BB166_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB166_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_max_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomax.w.aq a0, a1, (a0)
@@ -28696,6 +35016,22 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB166_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a3, a2, .LBB166_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB166_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB166_3: # in Loop: Header=BB166_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB166_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_max_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomax.w.aq a0, a1, (a0)
@@ -28746,6 +35082,21 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB167_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a3, a1, .LBB167_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB167_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB167_3: # in Loop: Header=BB167_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB167_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_max_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomax.w.rl a0, a1, (a0)
@@ -28795,6 +35146,22 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB167_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a3, a2, .LBB167_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB167_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB167_3: # in Loop: Header=BB167_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB167_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_max_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomax.w.rl a0, a1, (a0)
@@ -28845,6 +35212,21 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB168_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a3, a1, .LBB168_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB168_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB168_3: # in Loop: Header=BB168_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB168_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_max_i32_acq_rel:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomax.w.aqrl a0, a1, (a0)
@@ -28894,6 +35276,22 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB168_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a3, a2, .LBB168_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB168_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB168_3: # in Loop: Header=BB168_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB168_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_max_i32_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomax.w.aqrl a0, a1, (a0)
@@ -28944,6 +35342,21 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB169_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a3, a1, .LBB169_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB169_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB169_3: # in Loop: Header=BB169_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB169_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_max_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomax.w.aqrl a0, a1, (a0)
@@ -28993,6 +35406,22 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB169_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a3, a2, .LBB169_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB169_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB169_3: # in Loop: Header=BB169_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB169_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_max_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomax.w.aqrl a0, a1, (a0)
@@ -29043,6 +35472,21 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB170_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a1, a3, .LBB170_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB170_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB170_3: # in Loop: Header=BB170_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB170_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_min_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    amomin.w a0, a1, (a0)
@@ -29087,6 +35531,22 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB170_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a2, a3, .LBB170_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB170_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB170_3: # in Loop: Header=BB170_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB170_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_min_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomin.w a0, a1, (a0)
@@ -29132,6 +35592,21 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB171_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a1, a3, .LBB171_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB171_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB171_3: # in Loop: Header=BB171_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB171_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_min_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomin.w.aq a0, a1, (a0)
@@ -29181,6 +35656,22 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB171_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a2, a3, .LBB171_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB171_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB171_3: # in Loop: Header=BB171_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB171_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_min_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomin.w.aq a0, a1, (a0)
@@ -29231,6 +35722,21 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB172_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a1, a3, .LBB172_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB172_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB172_3: # in Loop: Header=BB172_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB172_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_min_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomin.w.rl a0, a1, (a0)
@@ -29280,6 +35786,22 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB172_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a2, a3, .LBB172_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB172_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB172_3: # in Loop: Header=BB172_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB172_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_min_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomin.w.rl a0, a1, (a0)
@@ -29330,6 +35852,21 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB173_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a1, a3, .LBB173_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB173_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB173_3: # in Loop: Header=BB173_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB173_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_min_i32_acq_rel:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomin.w.aqrl a0, a1, (a0)
@@ -29379,6 +35916,22 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB173_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a2, a3, .LBB173_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB173_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB173_3: # in Loop: Header=BB173_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB173_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_min_i32_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomin.w.aqrl a0, a1, (a0)
@@ -29429,6 +35982,21 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB174_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a1, a3, .LBB174_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB174_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB174_3: # in Loop: Header=BB174_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB174_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_min_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomin.w.aqrl a0, a1, (a0)
@@ -29478,6 +36046,22 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB174_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a2, a3, .LBB174_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB174_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB174_3: # in Loop: Header=BB174_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB174_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_min_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomin.w.aqrl a0, a1, (a0)
@@ -29528,6 +36112,21 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB175_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a3, a1, .LBB175_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB175_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB175_3: # in Loop: Header=BB175_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB175_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umax_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    amomaxu.w a0, a1, (a0)
@@ -29572,6 +36171,22 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB175_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a3, a2, .LBB175_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB175_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB175_3: # in Loop: Header=BB175_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB175_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_umax_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomaxu.w a0, a1, (a0)
@@ -29617,6 +36232,21 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB176_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a3, a1, .LBB176_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB176_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB176_3: # in Loop: Header=BB176_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB176_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomaxu.w.aq a0, a1, (a0)
@@ -29666,6 +36296,22 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB176_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a3, a2, .LBB176_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB176_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB176_3: # in Loop: Header=BB176_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB176_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomaxu.w.aq a0, a1, (a0)
@@ -29716,6 +36362,21 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB177_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a3, a1, .LBB177_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB177_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB177_3: # in Loop: Header=BB177_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB177_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomaxu.w.rl a0, a1, (a0)
@@ -29765,6 +36426,22 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB177_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a3, a2, .LBB177_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB177_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB177_3: # in Loop: Header=BB177_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB177_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomaxu.w.rl a0, a1, (a0)
@@ -29815,6 +36492,21 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB178_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a3, a1, .LBB178_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB178_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB178_3: # in Loop: Header=BB178_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB178_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_acq_rel:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomaxu.w.aqrl a0, a1, (a0)
@@ -29864,6 +36556,22 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB178_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a3, a2, .LBB178_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB178_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB178_3: # in Loop: Header=BB178_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB178_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomaxu.w.aqrl a0, a1, (a0)
@@ -29914,6 +36622,21 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB179_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a3, a1, .LBB179_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB179_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB179_3: # in Loop: Header=BB179_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB179_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amomaxu.w.aqrl a0, a1, (a0)
@@ -29963,6 +36686,22 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB179_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a3, a2, .LBB179_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB179_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB179_3: # in Loop: Header=BB179_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB179_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomaxu.w.aqrl a0, a1, (a0)
@@ -30013,6 +36752,21 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB180_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a1, a3, .LBB180_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB180_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB180_3: # in Loop: Header=BB180_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB180_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umin_i32_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    amominu.w a0, a1, (a0)
@@ -30057,6 +36811,22 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB180_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a2, a3, .LBB180_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB180_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB180_3: # in Loop: Header=BB180_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB180_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_umin_i32_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amominu.w a0, a1, (a0)
@@ -30102,6 +36872,21 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB181_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a1, a3, .LBB181_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB181_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB181_3: # in Loop: Header=BB181_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB181_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_acquire:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amominu.w.aq a0, a1, (a0)
@@ -30151,6 +36936,22 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB181_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a2, a3, .LBB181_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB181_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB181_3: # in Loop: Header=BB181_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB181_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amominu.w.aq a0, a1, (a0)
@@ -30201,6 +37002,21 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB182_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a1, a3, .LBB182_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB182_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB182_3: # in Loop: Header=BB182_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB182_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_release:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amominu.w.rl a0, a1, (a0)
@@ -30250,6 +37066,22 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB182_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a2, a3, .LBB182_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB182_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB182_3: # in Loop: Header=BB182_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB182_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amominu.w.rl a0, a1, (a0)
@@ -30300,6 +37132,21 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB183_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a1, a3, .LBB183_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB183_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB183_3: # in Loop: Header=BB183_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB183_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_acq_rel:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amominu.w.aqrl a0, a1, (a0)
@@ -30349,6 +37196,22 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB183_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a2, a3, .LBB183_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB183_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB183_3: # in Loop: Header=BB183_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB183_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amominu.w.aqrl a0, a1, (a0)
@@ -30399,6 +37262,21 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB184_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a1, a3, .LBB184_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB184_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB184_3: # in Loop: Header=BB184_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB184_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    amominu.w.aqrl a0, a1, (a0)
@@ -30448,6 +37326,22 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB184_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a2, a3, .LBB184_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB184_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB184_3: # in Loop: Header=BB184_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB184_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amominu.w.aqrl a0, a1, (a0)
@@ -30472,6 +37366,16 @@ define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xchg_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30492,6 +37396,17 @@ define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB185_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB185_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_xchg_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoswap.d a0, a1, (a0)
@@ -30511,6 +37426,16 @@ define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 2
+; RV32I-ZALRSC-NEXT:    call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xchg_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30531,6 +37456,17 @@ define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB186_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB186_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoswap.d.aq a0, a1, (a0)
@@ -30555,6 +37491,16 @@ define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 3
+; RV32I-ZALRSC-NEXT:    call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xchg_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30575,6 +37521,17 @@ define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB187_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB187_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoswap.d.rl a0, a1, (a0)
@@ -30599,6 +37556,16 @@ define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 4
+; RV32I-ZALRSC-NEXT:    call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xchg_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30619,6 +37586,17 @@ define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB188_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB188_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoswap.d.aqrl a0, a1, (a0)
@@ -30643,6 +37621,16 @@ define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 5
+; RV32I-ZALRSC-NEXT:    call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xchg_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30663,6 +37651,17 @@ define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB189_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB189_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoswap.d.aqrl a0, a1, (a0)
@@ -30687,6 +37686,16 @@ define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_add_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30707,6 +37716,17 @@ define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB190_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB190_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_add_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoadd.d a0, a1, (a0)
@@ -30726,6 +37746,16 @@ define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 2
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_add_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30746,6 +37776,17 @@ define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB191_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB191_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_add_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoadd.d.aq a0, a1, (a0)
@@ -30770,6 +37811,16 @@ define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 3
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_add_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30790,6 +37841,17 @@ define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB192_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB192_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_add_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoadd.d.rl a0, a1, (a0)
@@ -30814,6 +37876,16 @@ define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 4
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_add_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30834,6 +37906,17 @@ define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB193_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB193_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_add_i64_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoadd.d.aqrl a0, a1, (a0)
@@ -30858,6 +37941,16 @@ define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 5
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_add_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30878,6 +37971,17 @@ define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB194_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB194_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_add_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoadd.d.aqrl a0, a1, (a0)
@@ -30902,6 +38006,16 @@ define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_sub_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30922,6 +38036,17 @@ define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB195_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB195_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_sub_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    neg a1, a1
@@ -30942,6 +38067,16 @@ define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 2
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_sub_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -30962,6 +38097,17 @@ define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB196_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB196_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    neg a1, a1
@@ -30988,6 +38134,16 @@ define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 3
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_sub_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31008,6 +38164,17 @@ define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB197_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB197_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    neg a1, a1
@@ -31034,6 +38201,16 @@ define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 4
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_sub_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31054,6 +38231,17 @@ define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB198_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB198_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    neg a1, a1
@@ -31080,6 +38268,16 @@ define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 5
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_sub_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31100,6 +38298,17 @@ define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB199_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB199_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    neg a1, a1
@@ -31126,6 +38335,16 @@ define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_and_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31146,6 +38365,17 @@ define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB200_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB200_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_and_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoand.d a0, a1, (a0)
@@ -31165,6 +38395,16 @@ define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 2
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_and_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31185,6 +38425,17 @@ define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB201_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB201_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_and_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoand.d.aq a0, a1, (a0)
@@ -31209,6 +38460,16 @@ define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 3
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_and_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31229,6 +38490,17 @@ define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB202_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB202_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_and_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoand.d.rl a0, a1, (a0)
@@ -31253,6 +38525,16 @@ define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 4
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_and_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31273,6 +38555,17 @@ define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB203_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB203_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_and_i64_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoand.d.aqrl a0, a1, (a0)
@@ -31297,6 +38590,16 @@ define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 5
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_and_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31317,6 +38620,17 @@ define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB204_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB204_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_and_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoand.d.aqrl a0, a1, (a0)
@@ -31341,6 +38655,16 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_nand_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31361,6 +38685,18 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB205_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB205_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i64_monotonic:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:  .LBB205_1: # =>This Inner Loop Header: Depth=1
@@ -31453,6 +38789,16 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 2
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_nand_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31473,6 +38819,18 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB206_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB206_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_acquire:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:  .LBB206_1: # =>This Inner Loop Header: Depth=1
@@ -31591,6 +38949,16 @@ define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 3
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_nand_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31611,6 +38979,18 @@ define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB207_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB207_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_release:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:  .LBB207_1: # =>This Inner Loop Header: Depth=1
@@ -31729,6 +39109,16 @@ define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 4
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_nand_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31749,6 +39139,18 @@ define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB208_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB208_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_acq_rel:
 ; RV64IA-WMO-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-NOZACAS-NEXT:  .LBB208_1: # =>This Inner Loop Header: Depth=1
@@ -31867,6 +39269,16 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 5
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_nand_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -31887,6 +39299,18 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB209_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB209_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i64_seq_cst:
 ; RV64IA-NOZACAS:       # %bb.0:
 ; RV64IA-NOZACAS-NEXT:  .LBB209_1: # =>This Inner Loop Header: Depth=1
@@ -31997,6 +39421,16 @@ define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_or_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -32017,6 +39451,17 @@ define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB210_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB210_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_or_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoor.d a0, a1, (a0)
@@ -32036,6 +39481,16 @@ define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 2
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_or_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -32056,6 +39511,17 @@ define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB211_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB211_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_or_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoor.d.aq a0, a1, (a0)
@@ -32080,6 +39546,16 @@ define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 3
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_or_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -32100,6 +39576,17 @@ define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB212_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB212_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_or_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoor.d.rl a0, a1, (a0)
@@ -32124,6 +39611,16 @@ define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 4
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_or_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -32144,6 +39641,17 @@ define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB213_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB213_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_or_i64_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoor.d.aqrl a0, a1, (a0)
@@ -32168,6 +39676,16 @@ define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 5
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_or_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -32188,6 +39706,17 @@ define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB214_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB214_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_or_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoor.d.aqrl a0, a1, (a0)
@@ -32212,6 +39741,16 @@ define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xor_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -32232,6 +39771,17 @@ define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB215_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB215_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_xor_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoxor.d a0, a1, (a0)
@@ -32251,6 +39801,16 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 2
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xor_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -32271,6 +39831,17 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB216_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB216_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoxor.d.aq a0, a1, (a0)
@@ -32295,6 +39866,16 @@ define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 3
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xor_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -32315,6 +39896,17 @@ define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB217_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB217_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoxor.d.rl a0, a1, (a0)
@@ -32339,6 +39931,16 @@ define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 4
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xor_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -32359,6 +39961,17 @@ define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB218_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB218_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoxor.d.aqrl a0, a1, (a0)
@@ -32383,6 +39996,16 @@ define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 5
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_xor_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -16
@@ -32403,6 +40026,17 @@ define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB219_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB219_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amoxor.d.aqrl a0, a1, (a0)
@@ -32471,6 +40105,60 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB220_2
+; RV32I-ZALRSC-NEXT:  .LBB220_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB220_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a4, 0
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB220_7
+; RV32I-ZALRSC-NEXT:  .LBB220_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB220_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB220_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB220_5
+; RV32I-ZALRSC-NEXT:  .LBB220_4: # in Loop: Header=BB220_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB220_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB220_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB220_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB220_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB220_1
+; RV32I-ZALRSC-NEXT:  .LBB220_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_max_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -32561,6 +40249,21 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB220_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a3, a1, .LBB220_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB220_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB220_3: # in Loop: Header=BB220_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB220_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_max_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomax.d a0, a1, (a0)
@@ -32624,6 +40327,60 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB221_2
+; RV32I-ZALRSC-NEXT:  .LBB221_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB221_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 2
+; RV32I-ZALRSC-NEXT:    li a5, 2
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB221_7
+; RV32I-ZALRSC-NEXT:  .LBB221_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB221_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB221_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB221_5
+; RV32I-ZALRSC-NEXT:  .LBB221_4: # in Loop: Header=BB221_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB221_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB221_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB221_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB221_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB221_1
+; RV32I-ZALRSC-NEXT:  .LBB221_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_max_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -32714,6 +40471,21 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB221_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a3, a1, .LBB221_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB221_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB221_3: # in Loop: Header=BB221_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB221_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_max_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomax.d.aq a0, a1, (a0)
@@ -32782,6 +40554,60 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB222_2
+; RV32I-ZALRSC-NEXT:  .LBB222_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB222_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 3
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB222_7
+; RV32I-ZALRSC-NEXT:  .LBB222_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB222_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB222_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB222_5
+; RV32I-ZALRSC-NEXT:  .LBB222_4: # in Loop: Header=BB222_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB222_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB222_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB222_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB222_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB222_1
+; RV32I-ZALRSC-NEXT:  .LBB222_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_max_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -32872,6 +40698,21 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB222_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a3, a1, .LBB222_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB222_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB222_3: # in Loop: Header=BB222_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB222_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_max_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomax.d.rl a0, a1, (a0)
@@ -32940,6 +40781,60 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB223_2
+; RV32I-ZALRSC-NEXT:  .LBB223_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB223_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 4
+; RV32I-ZALRSC-NEXT:    li a5, 2
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB223_7
+; RV32I-ZALRSC-NEXT:  .LBB223_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB223_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB223_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB223_5
+; RV32I-ZALRSC-NEXT:  .LBB223_4: # in Loop: Header=BB223_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB223_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB223_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB223_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB223_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB223_1
+; RV32I-ZALRSC-NEXT:  .LBB223_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_max_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -33030,6 +40925,21 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB223_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a3, a1, .LBB223_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB223_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB223_3: # in Loop: Header=BB223_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB223_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_max_i64_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomax.d.aqrl a0, a1, (a0)
@@ -33098,6 +41008,60 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB224_2
+; RV32I-ZALRSC-NEXT:  .LBB224_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB224_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 5
+; RV32I-ZALRSC-NEXT:    li a5, 5
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB224_7
+; RV32I-ZALRSC-NEXT:  .LBB224_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB224_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB224_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB224_5
+; RV32I-ZALRSC-NEXT:  .LBB224_4: # in Loop: Header=BB224_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB224_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB224_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB224_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB224_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB224_1
+; RV32I-ZALRSC-NEXT:  .LBB224_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_max_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -33188,6 +41152,21 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB224_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a3, a1, .LBB224_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB224_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB224_3: # in Loop: Header=BB224_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB224_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_max_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomax.d.aqrl a0, a1, (a0)
@@ -33256,6 +41235,60 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB225_2
+; RV32I-ZALRSC-NEXT:  .LBB225_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB225_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a4, 0
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB225_7
+; RV32I-ZALRSC-NEXT:  .LBB225_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB225_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB225_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB225_5
+; RV32I-ZALRSC-NEXT:  .LBB225_4: # in Loop: Header=BB225_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB225_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB225_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB225_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB225_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB225_1
+; RV32I-ZALRSC-NEXT:  .LBB225_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_min_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -33346,6 +41379,21 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB225_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a1, a3, .LBB225_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB225_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB225_3: # in Loop: Header=BB225_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB225_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_min_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomin.d a0, a1, (a0)
@@ -33409,6 +41457,60 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB226_2
+; RV32I-ZALRSC-NEXT:  .LBB226_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB226_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 2
+; RV32I-ZALRSC-NEXT:    li a5, 2
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB226_7
+; RV32I-ZALRSC-NEXT:  .LBB226_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB226_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB226_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB226_5
+; RV32I-ZALRSC-NEXT:  .LBB226_4: # in Loop: Header=BB226_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB226_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB226_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB226_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB226_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB226_1
+; RV32I-ZALRSC-NEXT:  .LBB226_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_min_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -33499,6 +41601,21 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB226_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a1, a3, .LBB226_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB226_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB226_3: # in Loop: Header=BB226_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB226_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_min_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomin.d.aq a0, a1, (a0)
@@ -33567,6 +41684,60 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB227_2
+; RV32I-ZALRSC-NEXT:  .LBB227_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB227_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 3
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB227_7
+; RV32I-ZALRSC-NEXT:  .LBB227_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB227_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB227_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB227_5
+; RV32I-ZALRSC-NEXT:  .LBB227_4: # in Loop: Header=BB227_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB227_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB227_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB227_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB227_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB227_1
+; RV32I-ZALRSC-NEXT:  .LBB227_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_min_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -33657,6 +41828,21 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB227_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a1, a3, .LBB227_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB227_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB227_3: # in Loop: Header=BB227_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB227_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_min_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomin.d.rl a0, a1, (a0)
@@ -33725,6 +41911,60 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB228_2
+; RV32I-ZALRSC-NEXT:  .LBB228_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB228_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 4
+; RV32I-ZALRSC-NEXT:    li a5, 2
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB228_7
+; RV32I-ZALRSC-NEXT:  .LBB228_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB228_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB228_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB228_5
+; RV32I-ZALRSC-NEXT:  .LBB228_4: # in Loop: Header=BB228_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB228_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB228_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB228_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB228_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB228_1
+; RV32I-ZALRSC-NEXT:  .LBB228_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_min_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -33815,6 +42055,21 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB228_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a1, a3, .LBB228_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB228_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB228_3: # in Loop: Header=BB228_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB228_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_min_i64_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomin.d.aqrl a0, a1, (a0)
@@ -33883,6 +42138,60 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB229_2
+; RV32I-ZALRSC-NEXT:  .LBB229_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB229_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 5
+; RV32I-ZALRSC-NEXT:    li a5, 5
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB229_7
+; RV32I-ZALRSC-NEXT:  .LBB229_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB229_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB229_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB229_5
+; RV32I-ZALRSC-NEXT:  .LBB229_4: # in Loop: Header=BB229_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB229_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB229_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB229_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB229_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB229_1
+; RV32I-ZALRSC-NEXT:  .LBB229_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_min_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -33973,6 +42282,21 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB229_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a1, a3, .LBB229_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB229_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB229_3: # in Loop: Header=BB229_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB229_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_min_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomin.d.aqrl a0, a1, (a0)
@@ -34041,6 +42365,60 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB230_2
+; RV32I-ZALRSC-NEXT:  .LBB230_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB230_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a4, 0
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB230_7
+; RV32I-ZALRSC-NEXT:  .LBB230_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB230_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB230_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB230_5
+; RV32I-ZALRSC-NEXT:  .LBB230_4: # in Loop: Header=BB230_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB230_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB230_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB230_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB230_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB230_1
+; RV32I-ZALRSC-NEXT:  .LBB230_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umax_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -34131,6 +42509,21 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB230_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a3, a1, .LBB230_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB230_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB230_3: # in Loop: Header=BB230_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB230_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_umax_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomaxu.d a0, a1, (a0)
@@ -34194,6 +42587,60 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB231_2
+; RV32I-ZALRSC-NEXT:  .LBB231_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB231_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 2
+; RV32I-ZALRSC-NEXT:    li a5, 2
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB231_7
+; RV32I-ZALRSC-NEXT:  .LBB231_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB231_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB231_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB231_5
+; RV32I-ZALRSC-NEXT:  .LBB231_4: # in Loop: Header=BB231_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB231_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB231_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB231_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB231_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB231_1
+; RV32I-ZALRSC-NEXT:  .LBB231_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umax_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -34284,6 +42731,21 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB231_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a3, a1, .LBB231_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB231_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB231_3: # in Loop: Header=BB231_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB231_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomaxu.d.aq a0, a1, (a0)
@@ -34352,6 +42814,60 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB232_2
+; RV32I-ZALRSC-NEXT:  .LBB232_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB232_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 3
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB232_7
+; RV32I-ZALRSC-NEXT:  .LBB232_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB232_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB232_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB232_5
+; RV32I-ZALRSC-NEXT:  .LBB232_4: # in Loop: Header=BB232_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB232_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB232_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB232_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB232_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB232_1
+; RV32I-ZALRSC-NEXT:  .LBB232_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umax_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -34442,6 +42958,21 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB232_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a3, a1, .LBB232_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB232_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB232_3: # in Loop: Header=BB232_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB232_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomaxu.d.rl a0, a1, (a0)
@@ -34510,6 +43041,60 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB233_2
+; RV32I-ZALRSC-NEXT:  .LBB233_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB233_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 4
+; RV32I-ZALRSC-NEXT:    li a5, 2
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB233_7
+; RV32I-ZALRSC-NEXT:  .LBB233_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB233_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB233_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB233_5
+; RV32I-ZALRSC-NEXT:  .LBB233_4: # in Loop: Header=BB233_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB233_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB233_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB233_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB233_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB233_1
+; RV32I-ZALRSC-NEXT:  .LBB233_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umax_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -34600,6 +43185,21 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB233_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a3, a1, .LBB233_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB233_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB233_3: # in Loop: Header=BB233_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB233_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomaxu.d.aqrl a0, a1, (a0)
@@ -34668,6 +43268,60 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB234_2
+; RV32I-ZALRSC-NEXT:  .LBB234_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB234_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 5
+; RV32I-ZALRSC-NEXT:    li a5, 5
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB234_7
+; RV32I-ZALRSC-NEXT:  .LBB234_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB234_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB234_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB234_5
+; RV32I-ZALRSC-NEXT:  .LBB234_4: # in Loop: Header=BB234_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB234_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB234_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB234_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB234_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB234_1
+; RV32I-ZALRSC-NEXT:  .LBB234_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umax_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -34758,6 +43412,21 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB234_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a3, a1, .LBB234_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB234_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB234_3: # in Loop: Header=BB234_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB234_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amomaxu.d.aqrl a0, a1, (a0)
@@ -34826,6 +43495,60 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB235_2
+; RV32I-ZALRSC-NEXT:  .LBB235_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB235_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a4, 0
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB235_7
+; RV32I-ZALRSC-NEXT:  .LBB235_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB235_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB235_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB235_5
+; RV32I-ZALRSC-NEXT:  .LBB235_4: # in Loop: Header=BB235_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB235_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB235_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB235_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB235_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB235_1
+; RV32I-ZALRSC-NEXT:  .LBB235_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umin_i64_monotonic:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -34916,6 +43639,21 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB235_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a1, a3, .LBB235_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB235_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB235_3: # in Loop: Header=BB235_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB235_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-LABEL: atomicrmw_umin_i64_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amominu.d a0, a1, (a0)
@@ -34979,6 +43717,60 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_acquire:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB236_2
+; RV32I-ZALRSC-NEXT:  .LBB236_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB236_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 2
+; RV32I-ZALRSC-NEXT:    li a5, 2
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB236_7
+; RV32I-ZALRSC-NEXT:  .LBB236_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB236_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB236_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB236_5
+; RV32I-ZALRSC-NEXT:  .LBB236_4: # in Loop: Header=BB236_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB236_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB236_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB236_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB236_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB236_1
+; RV32I-ZALRSC-NEXT:  .LBB236_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umin_i64_acquire:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -35069,6 +43861,21 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_acquire:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB236_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a1, a3, .LBB236_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB236_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB236_3: # in Loop: Header=BB236_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB236_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_acquire:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amominu.d.aq a0, a1, (a0)
@@ -35137,6 +43944,60 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_release:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB237_2
+; RV32I-ZALRSC-NEXT:  .LBB237_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB237_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 3
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB237_7
+; RV32I-ZALRSC-NEXT:  .LBB237_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB237_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB237_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB237_5
+; RV32I-ZALRSC-NEXT:  .LBB237_4: # in Loop: Header=BB237_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB237_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB237_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB237_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB237_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB237_1
+; RV32I-ZALRSC-NEXT:  .LBB237_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umin_i64_release:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -35227,6 +44088,21 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_release:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB237_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a1, a3, .LBB237_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB237_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB237_3: # in Loop: Header=BB237_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB237_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_release:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amominu.d.rl a0, a1, (a0)
@@ -35295,6 +44171,60 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_acq_rel:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB238_2
+; RV32I-ZALRSC-NEXT:  .LBB238_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB238_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 4
+; RV32I-ZALRSC-NEXT:    li a5, 2
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB238_7
+; RV32I-ZALRSC-NEXT:  .LBB238_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB238_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB238_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB238_5
+; RV32I-ZALRSC-NEXT:  .LBB238_4: # in Loop: Header=BB238_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB238_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB238_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB238_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB238_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB238_1
+; RV32I-ZALRSC-NEXT:  .LBB238_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umin_i64_acq_rel:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -35385,6 +44315,21 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_acq_rel:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB238_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a1, a3, .LBB238_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB238_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB238_3: # in Loop: Header=BB238_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB238_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_acq_rel:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amominu.d.aqrl a0, a1, (a0)
@@ -35453,6 +44398,60 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_seq_cst:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB239_2
+; RV32I-ZALRSC-NEXT:  .LBB239_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB239_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    li a4, 5
+; RV32I-ZALRSC-NEXT:    li a5, 5
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB239_7
+; RV32I-ZALRSC-NEXT:  .LBB239_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB239_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB239_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB239_5
+; RV32I-ZALRSC-NEXT:  .LBB239_4: # in Loop: Header=BB239_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB239_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB239_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB239_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB239_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB239_1
+; RV32I-ZALRSC-NEXT:  .LBB239_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV32IA-LABEL: atomicrmw_umin_i64_seq_cst:
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    addi sp, sp, -32
@@ -35543,6 +44542,21 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_seq_cst:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB239_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a1, a3, .LBB239_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB239_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB239_3: # in Loop: Header=BB239_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB239_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
+;
 ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    amominu.d.aqrl a0, a1, (a0)
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index 7d29ac9..7fe5fa7 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -5,12 +5,16 @@
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS %s
 ; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s
+; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32I-ZALRSC %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s
+; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64I-ZALRSC %s
 
 define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ; RV32I-LABEL: atomic_load_i8_unordered:
@@ -30,6 +34,11 @@ define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ; RV32IA-NEXT:    lb a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i8_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lb a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomic_load_i8_unordered:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -46,6 +55,11 @@ define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    lb a0, 0(a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomic_load_i8_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lb a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
   %1 = load atomic i8, ptr %a unordered, align 1
   ret i8 %1
 }
@@ -68,6 +82,11 @@ define signext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
 ; RV32IA-NEXT:    lh a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i16_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lh a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomic_load_i16_unordered:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -84,6 +103,11 @@ define signext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    lh a0, 0(a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomic_load_i16_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lh a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
   %1 = load atomic i16, ptr %a unordered, align 2
   ret i16 %1
 }
@@ -104,6 +128,11 @@ define signext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
 ; RV32IA-NEXT:    lw a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomic_load_i32_unordered:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a0)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomic_load_i32_unordered:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -119,6 +148,11 @@ define signext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    lw a0, 0(a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomic_load_i32_unordered:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    lw a0, 0(a0)
+; RV64I-ZALRSC-NEXT:    ret
   %1 = load atomic i32, ptr %a unordered, align 4
   ret i32 %1
 }
@@ -159,6 +193,28 @@ define signext i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB3_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_xchg_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -192,6 +248,28 @@ define signext i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB3_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -231,6 +309,28 @@ define signext i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB4_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_add_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -264,6 +364,28 @@ define signext i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB4_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw add ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -303,6 +425,28 @@ define signext i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB5_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_sub_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -336,6 +480,28 @@ define signext i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a4, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB5_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -369,6 +535,27 @@ define signext i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB6_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_and_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -396,6 +583,27 @@ define signext i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB6_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw and ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -436,6 +644,29 @@ define signext i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a4, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB7_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_nand_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -470,6 +701,29 @@ define signext i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a4, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB7_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -499,6 +753,23 @@ define signext i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB8_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_or_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -522,6 +793,23 @@ define signext i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB8_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw or ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -551,6 +839,23 @@ define signext i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB9_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_xor_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -574,6 +879,23 @@ define signext i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB9_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw xor ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -653,6 +975,37 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB10_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB10_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB10_3: # in Loop: Header=BB10_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB10_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_max_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -726,6 +1079,37 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB10_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB10_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB10_3: # in Loop: Header=BB10_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB10_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw max ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -805,6 +1189,37 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 24
+; RV32I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 24
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    xori a4, a4, 24
+; RV32I-ZALRSC-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB11_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB11_3: # in Loop: Header=BB11_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB11_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_min_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -878,6 +1293,37 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 56
+; RV64I-ZALRSC-NEXT:    andi a4, a0, 24
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 56
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    xori a4, a4, 56
+; RV64I-ZALRSC-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB11_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB11_3: # in Loop: Header=BB11_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB11_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw min ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -950,6 +1396,32 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB12_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB12_3: # in Loop: Header=BB12_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB12_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -1016,6 +1488,32 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB12_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB12_3: # in Loop: Header=BB12_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB12_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -1088,6 +1586,32 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a3, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a3
+; RV32I-ZALRSC-NEXT:    mv a5, a4
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB13_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a3
+; RV32I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB13_3: # in Loop: Header=BB13_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB13_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -1154,6 +1678,32 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a3, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a3
+; RV64I-ZALRSC-NEXT:    mv a5, a4
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB13_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a3
+; RV64I-ZALRSC-NEXT:    xor a5, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB13_3: # in Loop: Header=BB13_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB13_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i8 %b monotonic
   ret i8 %1
 }
@@ -1194,6 +1744,29 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    mv a5, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB14_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_xchg_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -1228,6 +1801,29 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    mv a5, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB14_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1268,6 +1864,29 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    add a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB15_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_add_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -1302,6 +1921,29 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    add a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB15_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw add ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1342,6 +1984,29 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB16_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_sub_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -1376,6 +2041,29 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    sub a5, a3, a1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB16_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1410,6 +2098,28 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    not a3, a4
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    or a1, a1, a3
+; RV32I-ZALRSC-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB17_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_and_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -1438,6 +2148,28 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    not a3, a4
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    or a1, a1, a3
+; RV64I-ZALRSC-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB17_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw and ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1479,6 +2211,30 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a5, a3, a1
+; RV32I-ZALRSC-NEXT:    not a5, a5
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB18_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_nand_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -1514,6 +2270,30 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a5, a3, a1
+; RV64I-ZALRSC-NEXT:    not a5, a5
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB18_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1544,6 +2324,24 @@ define signext i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    or a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB19_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_or_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -1568,6 +2366,24 @@ define signext i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    or a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB19_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw or ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1598,6 +2414,24 @@ define signext i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    srli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB20_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_xor_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -1622,6 +2456,24 @@ define signext i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    srli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    xor a4, a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB20_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw xor ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1703,6 +2555,39 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a7, a1, .LBB21_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB21_3: # in Loop: Header=BB21_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB21_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_max_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -1778,6 +2663,39 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a7, a1, .LBB21_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB21_3: # in Loop: Header=BB21_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB21_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw max ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1859,6 +2777,39 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    slli a1, a1, 16
+; RV32I-ZALRSC-NEXT:    li a4, 16
+; RV32I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    srai a1, a1, 16
+; RV32I-ZALRSC-NEXT:    sll a3, a3, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV32I-ZALRSC-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT:    and a7, a5, a3
+; RV32I-ZALRSC-NEXT:    mv a6, a5
+; RV32I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV32I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV32I-ZALRSC-NEXT:    bge a1, a7, .LBB22_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV32I-ZALRSC-NEXT:    and a6, a6, a3
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:  .LBB22_3: # in Loop: Header=BB22_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB22_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_min_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -1934,6 +2885,39 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    slli a1, a1, 48
+; RV64I-ZALRSC-NEXT:    li a4, 48
+; RV64I-ZALRSC-NEXT:    andi a5, a0, 24
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    srai a1, a1, 48
+; RV64I-ZALRSC-NEXT:    sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sub a4, a4, a5
+; RV64I-ZALRSC-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT:    and a7, a5, a3
+; RV64I-ZALRSC-NEXT:    mv a6, a5
+; RV64I-ZALRSC-NEXT:    sll a7, a7, a4
+; RV64I-ZALRSC-NEXT:    sra a7, a7, a4
+; RV64I-ZALRSC-NEXT:    bge a1, a7, .LBB22_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a1
+; RV64I-ZALRSC-NEXT:    and a6, a6, a3
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:  .LBB22_3: # in Loop: Header=BB22_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB22_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw min ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -2011,6 +2995,33 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a6, a1, .LBB23_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB23_3: # in Loop: Header=BB23_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB23_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_umax_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -2082,6 +3093,33 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a6, a1, .LBB23_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB23_3: # in Loop: Header=BB23_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB23_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -2159,6 +3197,33 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a3, 16
+; RV32I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV32I-ZALRSC-NEXT:    sll a4, a3, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a3
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT:    and a6, a3, a4
+; RV32I-ZALRSC-NEXT:    mv a5, a3
+; RV32I-ZALRSC-NEXT:    bgeu a1, a6, .LBB24_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV32I-ZALRSC-NEXT:  .LBB24_3: # in Loop: Header=BB24_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB24_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    srl a0, a3, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_umin_i16_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -2230,6 +3295,33 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a3, 16
+; RV64I-ZALRSC-NEXT:    addi a3, a3, -1
+; RV64I-ZALRSC-NEXT:    sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a3
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT:    and a6, a3, a4
+; RV64I-ZALRSC-NEXT:    mv a5, a3
+; RV64I-ZALRSC-NEXT:    bgeu a1, a6, .LBB24_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a1
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a3, a5
+; RV64I-ZALRSC-NEXT:  .LBB24_3: # in Loop: Header=BB24_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB24_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i16 %b monotonic
   ret i16 %1
 }
@@ -2250,6 +3342,17 @@ define signext i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-NEXT:    amoswap.w a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB25_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_xchg_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -2265,6 +3368,17 @@ define signext i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoswap.w a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB25_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2285,6 +3399,17 @@ define signext i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-NEXT:    amoadd.w a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    add a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB26_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_add_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -2300,6 +3425,17 @@ define signext i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoadd.w a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB26_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw add ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2321,6 +3457,17 @@ define signext i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-NEXT:    amoadd.w a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB27_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_sub_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -2337,6 +3484,17 @@ define signext i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA-NEXT:    neg a1, a1
 ; RV64IA-NEXT:    amoadd.w a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB27_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2357,6 +3515,17 @@ define signext i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-NEXT:    amoand.w a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB28_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_and_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -2372,6 +3541,17 @@ define signext i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoand.w a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB28_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw and ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2413,6 +3593,18 @@ define signext i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
 ; RV32IA-ZACAS-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    and a3, a2, a1
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB29_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_nand_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -2449,6 +3641,18 @@ define signext i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    bne a0, a3, .LBB29_1
 ; RV64IA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
 ; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB29_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2469,6 +3673,17 @@ define signext i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-NEXT:    amoor.w a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    or a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB30_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_or_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -2484,6 +3699,17 @@ define signext i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoor.w a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB30_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw or ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2504,6 +3730,17 @@ define signext i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-NEXT:    amoxor.w a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB31_1
+; RV32I-ZALRSC-NEXT:  # %bb.2:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_xor_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -2519,6 +3756,17 @@ define signext i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoxor.w a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB31_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw xor ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2565,6 +3813,21 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-NEXT:    amomax.w a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a3, a1, .LBB32_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB32_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB32_3: # in Loop: Header=BB32_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB32_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_max_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -2608,6 +3871,22 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomax.w a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a3, a2, .LBB32_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB32_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB32_3: # in Loop: Header=BB32_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB32_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw max ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2654,6 +3933,21 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-NEXT:    amomin.w a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bge a1, a3, .LBB33_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB33_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB33_3: # in Loop: Header=BB33_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB33_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_min_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -2697,6 +3991,22 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomin.w a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bge a2, a3, .LBB33_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB33_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB33_3: # in Loop: Header=BB33_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB33_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw min ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2743,6 +4053,21 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-NEXT:    amomaxu.w a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a3, a1, .LBB34_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB34_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB34_3: # in Loop: Header=BB34_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB34_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_umax_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -2786,6 +4111,22 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomaxu.w a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a3, a2, .LBB34_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB34_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB34_3: # in Loop: Header=BB34_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB34_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2832,6 +4173,21 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV32IA-NEXT:    amominu.w a0, a1, (a0)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    bgeu a1, a3, .LBB35_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB35_1 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a1
+; RV32I-ZALRSC-NEXT:  .LBB35_3: # in Loop: Header=BB35_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB35_1
+; RV32I-ZALRSC-NEXT:  # %bb.4:
+; RV32I-ZALRSC-NEXT:    mv a0, a2
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_umin_i32_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -48
@@ -2875,6 +4231,22 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amominu.w a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    sext.w a2, a1
+; RV64I-ZALRSC-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a2, a3, .LBB35_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB35_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB35_3: # in Loop: Header=BB35_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB35_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i32 %b monotonic
   ret i32 %1
 }
@@ -2900,6 +4272,16 @@ define signext i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_xchg_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -2914,6 +4296,17 @@ define signext i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoswap.d a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB36_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -2939,6 +4332,16 @@ define signext i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_add_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -2953,6 +4356,17 @@ define signext i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoadd.d a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB37_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw add ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -2978,6 +4392,16 @@ define signext i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_sub_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -2993,6 +4417,17 @@ define signext i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA-NEXT:    neg a1, a1
 ; RV64IA-NEXT:    amoadd.d a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB38_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -3018,6 +4453,16 @@ define signext i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_and_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -3032,6 +4477,17 @@ define signext i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoand.d a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB39_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw and ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -3057,6 +4513,16 @@ define signext i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_nand_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -3092,6 +4558,18 @@ define signext i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    bne a0, a3, .LBB40_1
 ; RV64IA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
 ; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a2, a1
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB40_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -3117,6 +4595,16 @@ define signext i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_or_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -3131,6 +4619,17 @@ define signext i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoor.d a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB41_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw or ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -3156,6 +4655,16 @@ define signext i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -16
+; RV32I-ZALRSC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    li a3, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_xor_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -3170,6 +4679,17 @@ define signext i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amoxor.d a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB42_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a2, a1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB42_1
+; RV64I-ZALRSC-NEXT:  # %bb.2:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw xor ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -3283,6 +4803,60 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB43_2
+; RV32I-ZALRSC-NEXT:  .LBB43_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB43_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a4, 0
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB43_7
+; RV32I-ZALRSC-NEXT:  .LBB43_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB43_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB43_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB43_5
+; RV32I-ZALRSC-NEXT:  .LBB43_4: # in Loop: Header=BB43_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB43_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB43_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB43_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB43_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB43_1
+; RV32I-ZALRSC-NEXT:  .LBB43_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_max_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -32
@@ -3323,6 +4897,21 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomax.d a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB43_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a3, a1, .LBB43_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB43_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB43_3: # in Loop: Header=BB43_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB43_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw max ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -3436,6 +5025,60 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB44_2
+; RV32I-ZALRSC-NEXT:  .LBB44_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB44_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a4, 0
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB44_7
+; RV32I-ZALRSC-NEXT:  .LBB44_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB44_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB44_2 Depth=1
+; RV32I-ZALRSC-NEXT:    slt a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB44_5
+; RV32I-ZALRSC-NEXT:  .LBB44_4: # in Loop: Header=BB44_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB44_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB44_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB44_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB44_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB44_1
+; RV32I-ZALRSC-NEXT:  .LBB44_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_min_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -32
@@ -3476,6 +5119,21 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomin.d a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bge a1, a3, .LBB44_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB44_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB44_3: # in Loop: Header=BB44_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB44_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw min ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -3589,6 +5247,60 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB45_2
+; RV32I-ZALRSC-NEXT:  .LBB45_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB45_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a4, 0
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB45_7
+; RV32I-ZALRSC-NEXT:  .LBB45_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB45_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB45_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB45_5
+; RV32I-ZALRSC-NEXT:  .LBB45_4: # in Loop: Header=BB45_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB45_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB45_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB45_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB45_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB45_1
+; RV32I-ZALRSC-NEXT:  .LBB45_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_umax_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -32
@@ -3629,6 +5341,21 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amomaxu.d a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a3, a1, .LBB45_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB45_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -3742,6 +5469,60 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    addi sp, sp, -32
+; RV32I-ZALRSC-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT:    mv s0, a2
+; RV32I-ZALRSC-NEXT:    mv s1, a0
+; RV32I-ZALRSC-NEXT:    lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT:    lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT:    mv s2, a1
+; RV32I-ZALRSC-NEXT:    j .LBB46_2
+; RV32I-ZALRSC-NEXT:  .LBB46_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB46_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    addi a1, sp, 8
+; RV32I-ZALRSC-NEXT:    mv a0, s1
+; RV32I-ZALRSC-NEXT:    li a4, 0
+; RV32I-ZALRSC-NEXT:    li a5, 0
+; RV32I-ZALRSC-NEXT:    call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT:    lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT:    lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT:    bnez a0, .LBB46_7
+; RV32I-ZALRSC-NEXT:  .LBB46_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    beq a5, s0, .LBB46_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB46_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT:    j .LBB46_5
+; RV32I-ZALRSC-NEXT:  .LBB46_4: # in Loop: Header=BB46_2 Depth=1
+; RV32I-ZALRSC-NEXT:    sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT:  .LBB46_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB46_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, a4
+; RV32I-ZALRSC-NEXT:    mv a3, a5
+; RV32I-ZALRSC-NEXT:    beqz a0, .LBB46_1
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB46_2 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a2, s2
+; RV32I-ZALRSC-NEXT:    mv a3, s0
+; RV32I-ZALRSC-NEXT:    j .LBB46_1
+; RV32I-ZALRSC-NEXT:  .LBB46_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT:    mv a0, a4
+; RV32I-ZALRSC-NEXT:    mv a1, a5
+; RV32I-ZALRSC-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT:    addi sp, sp, 32
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_umin_i64_monotonic:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -32
@@ -3782,6 +5563,21 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    amominu.d a0, a1, (a0)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    bgeu a1, a3, .LBB46_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB46_1
+; RV64I-ZALRSC-NEXT:  # %bb.4:
+; RV64I-ZALRSC-NEXT:    mv a0, a2
+; RV64I-ZALRSC-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i64 %b monotonic
   ret i64 %1
 }
@@ -3827,6 +5623,32 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp
 ; RV32IA-NEXT:    srai a0, a0, 24
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val0:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a3, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a4, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    zext.b a2, a2
+; RV32I-ZALRSC-NEXT:    sll a4, a4, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a5, (a3)
+; RV32I-ZALRSC-NEXT:    and a6, a5, a4
+; RV32I-ZALRSC-NEXT:    bne a6, a1, .LBB47_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a2
+; RV32I-ZALRSC-NEXT:    and a6, a6, a4
+; RV32I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a3)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB47_1
+; RV32I-ZALRSC-NEXT:  .LBB47_3:
+; RV32I-ZALRSC-NEXT:    srl a0, a5, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 24
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 24
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: cmpxchg_i8_monotonic_monotonic_val0:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -3866,6 +5688,32 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp
 ; RV64IA-NEXT:    slli a0, a0, 56
 ; RV64IA-NEXT:    srai a0, a0, 56
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val0:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a3, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a4, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    zext.b a2, a2
+; RV64I-ZALRSC-NEXT:    sllw a4, a4, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a5, (a3)
+; RV64I-ZALRSC-NEXT:    and a6, a5, a4
+; RV64I-ZALRSC-NEXT:    bne a6, a1, .LBB47_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a2
+; RV64I-ZALRSC-NEXT:    and a6, a6, a4
+; RV64I-ZALRSC-NEXT:    xor a6, a5, a6
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a3)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB47_1
+; RV64I-ZALRSC-NEXT:  .LBB47_3:
+; RV64I-ZALRSC-NEXT:    srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 56
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 56
+; RV64I-ZALRSC-NEXT:    ret
   %1 = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
   %2 = extractvalue { i8, i1 } %1, 0
   ret i8 %2
@@ -3911,6 +5759,32 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig
 ; RV32IA-NEXT:    seqz a0, a1
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val1:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a3, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    li a4, 255
+; RV32I-ZALRSC-NEXT:    zext.b a1, a1
+; RV32I-ZALRSC-NEXT:    zext.b a2, a2
+; RV32I-ZALRSC-NEXT:    sll a4, a4, a0
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sll a0, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a3)
+; RV32I-ZALRSC-NEXT:    and a5, a2, a4
+; RV32I-ZALRSC-NEXT:    bne a5, a1, .LBB48_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a5, a2, a0
+; RV32I-ZALRSC-NEXT:    and a5, a5, a4
+; RV32I-ZALRSC-NEXT:    xor a5, a2, a5
+; RV32I-ZALRSC-NEXT:    sc.w a5, a5, (a3)
+; RV32I-ZALRSC-NEXT:    bnez a5, .LBB48_1
+; RV32I-ZALRSC-NEXT:  .LBB48_3:
+; RV32I-ZALRSC-NEXT:    and a2, a2, a4
+; RV32I-ZALRSC-NEXT:    xor a1, a1, a2
+; RV32I-ZALRSC-NEXT:    seqz a0, a1
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: cmpxchg_i8_monotonic_monotonic_val1:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -3949,6 +5823,32 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig
 ; RV64IA-NEXT:    xor a1, a1, a2
 ; RV64IA-NEXT:    seqz a0, a1
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val1:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a3, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    li a4, 255
+; RV64I-ZALRSC-NEXT:    zext.b a1, a1
+; RV64I-ZALRSC-NEXT:    zext.b a2, a2
+; RV64I-ZALRSC-NEXT:    sllw a4, a4, a0
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sllw a0, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a3)
+; RV64I-ZALRSC-NEXT:    and a5, a2, a4
+; RV64I-ZALRSC-NEXT:    bne a5, a1, .LBB48_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a5, a2, a0
+; RV64I-ZALRSC-NEXT:    and a5, a5, a4
+; RV64I-ZALRSC-NEXT:    xor a5, a2, a5
+; RV64I-ZALRSC-NEXT:    sc.w a5, a5, (a3)
+; RV64I-ZALRSC-NEXT:    bnez a5, .LBB48_1
+; RV64I-ZALRSC-NEXT:  .LBB48_3:
+; RV64I-ZALRSC-NEXT:    and a2, a2, a4
+; RV64I-ZALRSC-NEXT:    xor a1, a1, a2
+; RV64I-ZALRSC-NEXT:    seqz a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %1 = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
   %2 = extractvalue { i8, i1 } %1, 1
   ret i1 %2
@@ -3996,6 +5896,33 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext %
 ; RV32IA-NEXT:    srai a0, a0, 16
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val0:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a3, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a4, 16
+; RV32I-ZALRSC-NEXT:    addi a4, a4, -1
+; RV32I-ZALRSC-NEXT:    sll a5, a4, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a4
+; RV32I-ZALRSC-NEXT:    and a2, a2, a4
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sll a2, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a4, (a3)
+; RV32I-ZALRSC-NEXT:    and a6, a4, a5
+; RV32I-ZALRSC-NEXT:    bne a6, a1, .LBB49_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a6, a4, a2
+; RV32I-ZALRSC-NEXT:    and a6, a6, a5
+; RV32I-ZALRSC-NEXT:    xor a6, a4, a6
+; RV32I-ZALRSC-NEXT:    sc.w a6, a6, (a3)
+; RV32I-ZALRSC-NEXT:    bnez a6, .LBB49_1
+; RV32I-ZALRSC-NEXT:  .LBB49_3:
+; RV32I-ZALRSC-NEXT:    srl a0, a4, a0
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 16
+; RV32I-ZALRSC-NEXT:    srai a0, a0, 16
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: cmpxchg_i16_monotonic_monotonic_val0:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -4036,6 +5963,33 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext %
 ; RV64IA-NEXT:    slli a0, a0, 48
 ; RV64IA-NEXT:    srai a0, a0, 48
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val0:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a3, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a4, 16
+; RV64I-ZALRSC-NEXT:    addi a4, a4, -1
+; RV64I-ZALRSC-NEXT:    sllw a5, a4, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a4
+; RV64I-ZALRSC-NEXT:    and a2, a2, a4
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a4, (a3)
+; RV64I-ZALRSC-NEXT:    and a6, a4, a5
+; RV64I-ZALRSC-NEXT:    bne a6, a1, .LBB49_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a6, a4, a2
+; RV64I-ZALRSC-NEXT:    and a6, a6, a5
+; RV64I-ZALRSC-NEXT:    xor a6, a4, a6
+; RV64I-ZALRSC-NEXT:    sc.w a6, a6, (a3)
+; RV64I-ZALRSC-NEXT:    bnez a6, .LBB49_1
+; RV64I-ZALRSC-NEXT:  .LBB49_3:
+; RV64I-ZALRSC-NEXT:    srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 48
+; RV64I-ZALRSC-NEXT:    srai a0, a0, 48
+; RV64I-ZALRSC-NEXT:    ret
   %1 = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
   %2 = extractvalue { i16, i1 } %1, 0
   ret i16 %2
@@ -4082,6 +6036,33 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16
 ; RV32IA-NEXT:    seqz a0, a1
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val1:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a3, a0, -4
+; RV32I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV32I-ZALRSC-NEXT:    lui a4, 16
+; RV32I-ZALRSC-NEXT:    addi a4, a4, -1
+; RV32I-ZALRSC-NEXT:    sll a5, a4, a0
+; RV32I-ZALRSC-NEXT:    and a1, a1, a4
+; RV32I-ZALRSC-NEXT:    and a2, a2, a4
+; RV32I-ZALRSC-NEXT:    sll a1, a1, a0
+; RV32I-ZALRSC-NEXT:    sll a0, a2, a0
+; RV32I-ZALRSC-NEXT:  .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a2, (a3)
+; RV32I-ZALRSC-NEXT:    and a4, a2, a5
+; RV32I-ZALRSC-NEXT:    bne a4, a1, .LBB50_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32I-ZALRSC-NEXT:    xor a4, a2, a0
+; RV32I-ZALRSC-NEXT:    and a4, a4, a5
+; RV32I-ZALRSC-NEXT:    xor a4, a2, a4
+; RV32I-ZALRSC-NEXT:    sc.w a4, a4, (a3)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB50_1
+; RV32I-ZALRSC-NEXT:  .LBB50_3:
+; RV32I-ZALRSC-NEXT:    and a2, a2, a5
+; RV32I-ZALRSC-NEXT:    xor a1, a1, a2
+; RV32I-ZALRSC-NEXT:    seqz a0, a1
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: cmpxchg_i16_monotonic_monotonic_val1:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -4121,6 +6102,33 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16
 ; RV64IA-NEXT:    xor a1, a1, a2
 ; RV64IA-NEXT:    seqz a0, a1
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val1:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a3, a0, -4
+; RV64I-ZALRSC-NEXT:    slli a0, a0, 3
+; RV64I-ZALRSC-NEXT:    lui a4, 16
+; RV64I-ZALRSC-NEXT:    addi a4, a4, -1
+; RV64I-ZALRSC-NEXT:    sllw a5, a4, a0
+; RV64I-ZALRSC-NEXT:    and a1, a1, a4
+; RV64I-ZALRSC-NEXT:    and a2, a2, a4
+; RV64I-ZALRSC-NEXT:    sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT:    sllw a0, a2, a0
+; RV64I-ZALRSC-NEXT:  .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a2, (a3)
+; RV64I-ZALRSC-NEXT:    and a4, a2, a5
+; RV64I-ZALRSC-NEXT:    bne a4, a1, .LBB50_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV64I-ZALRSC-NEXT:    xor a4, a2, a0
+; RV64I-ZALRSC-NEXT:    and a4, a4, a5
+; RV64I-ZALRSC-NEXT:    xor a4, a2, a4
+; RV64I-ZALRSC-NEXT:    sc.w a4, a4, (a3)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB50_1
+; RV64I-ZALRSC-NEXT:  .LBB50_3:
+; RV64I-ZALRSC-NEXT:    and a2, a2, a5
+; RV64I-ZALRSC-NEXT:    xor a1, a1, a2
+; RV64I-ZALRSC-NEXT:    seqz a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %1 = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
   %2 = extractvalue { i16, i1 } %1, 1
   ret i1 %2
@@ -4159,6 +6167,18 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext %
 ; RV32IA-ZACAS-NEXT:    mv a0, a1
 ; RV32IA-ZACAS-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val0:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a0)
+; RV32I-ZALRSC-NEXT:    bne a3, a1, .LBB51_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a2, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB51_1
+; RV32I-ZALRSC-NEXT:  .LBB51_3:
+; RV32I-ZALRSC-NEXT:    mv a0, a3
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val0:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -4190,6 +6210,18 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext %
 ; RV64IA-ZACAS-NEXT:    amocas.w a1, a2, (a0)
 ; RV64IA-ZACAS-NEXT:    mv a0, a1
 ; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val0:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a0)
+; RV64I-ZALRSC-NEXT:    bne a3, a1, .LBB51_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a2, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB51_1
+; RV64I-ZALRSC-NEXT:  .LBB51_3:
+; RV64I-ZALRSC-NEXT:    mv a0, a3
+; RV64I-ZALRSC-NEXT:    ret
   %1 = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
   %2 = extractvalue { i32, i1 } %1, 0
   ret i32 %2
@@ -4230,6 +6262,19 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32
 ; RV32IA-ZACAS-NEXT:    seqz a0, a1
 ; RV32IA-ZACAS-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val1:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a3, (a0)
+; RV32I-ZALRSC-NEXT:    bne a3, a1, .LBB52_3
+; RV32I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a4, a2, (a0)
+; RV32I-ZALRSC-NEXT:    bnez a4, .LBB52_1
+; RV32I-ZALRSC-NEXT:  .LBB52_3:
+; RV32I-ZALRSC-NEXT:    xor a1, a3, a1
+; RV32I-ZALRSC-NEXT:    seqz a0, a1
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val1:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -4263,6 +6308,19 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32
 ; RV64IA-ZACAS-NEXT:    xor a1, a3, a1
 ; RV64IA-ZACAS-NEXT:    seqz a0, a1
 ; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val1:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a3, (a0)
+; RV64I-ZALRSC-NEXT:    bne a3, a1, .LBB52_3
+; RV64I-ZALRSC-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a4, a2, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB52_1
+; RV64I-ZALRSC-NEXT:  .LBB52_3:
+; RV64I-ZALRSC-NEXT:    xor a1, a3, a1
+; RV64I-ZALRSC-NEXT:    seqz a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   %1 = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
   %2 = extractvalue { i32, i1 } %1, 1
   ret i1 %2
@@ -4304,6 +6362,27 @@ define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV32IA-NEXT:    sw a2, 0(a1)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB53_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB53_3: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB53_3
+; RV32I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB53_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_xchg_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a1, a1, 1
@@ -4339,6 +6418,28 @@ define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV64IA-NEXT:    li a2, 1
 ; RV64IA-NEXT:    sw a2, 0(a1)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a1, 1
+; RV64I-ZALRSC-NEXT:    beqz a1, .LBB53_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB53_3: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB53_3
+; RV64I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB53_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -4391,6 +6492,27 @@ define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV32IA-NEXT:    sw a2, 0(a1)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB54_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB54_3: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    add a3, a0, a2
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB54_3
+; RV32I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB54_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    addi a2, a0, 1
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_add_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a1, a1, 1
@@ -4426,6 +6548,28 @@ define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV64IA-NEXT:    addi a2, a0, 1
 ; RV64IA-NEXT:    sw a2, 0(a1)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a1, 1
+; RV64I-ZALRSC-NEXT:    beqz a1, .LBB54_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB54_3: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    add a3, a1, a2
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB54_3
+; RV64I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB54_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    addi a2, a1, 1
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -4479,6 +6623,27 @@ define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV32IA-NEXT:    sw a2, 0(a1)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB55_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB55_3: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    sub a3, a0, a2
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB55_3
+; RV32I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB55_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    addi a2, a0, -1
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_sub_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a1, a1, 1
@@ -4514,6 +6679,28 @@ define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV64IA-NEXT:    addi a2, a0, -1
 ; RV64IA-NEXT:    sw a2, 0(a1)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a1, 1
+; RV64I-ZALRSC-NEXT:    beqz a1, .LBB55_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB55_3: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    sub a3, a1, a2
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB55_3
+; RV64I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB55_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    addi a2, a1, -1
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -4567,6 +6754,27 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV32IA-NEXT:    sw a2, 0(a1)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB56_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB56_3: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    and a3, a0, a2
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB56_3
+; RV32I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB56_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    andi a2, a0, 1
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_and_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a1, a1, 1
@@ -4602,6 +6810,28 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV64IA-NEXT:    andi a2, a0, 1
 ; RV64IA-NEXT:    sw a2, 0(a1)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a1, 1
+; RV64I-ZALRSC-NEXT:    beqz a1, .LBB56_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB56_3: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    and a3, a1, a2
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB56_3
+; RV64I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB56_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -4685,6 +6915,28 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV32IA-ZACAS-NEXT:    mv a0, a1
 ; RV32IA-ZACAS-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB57_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB57_3: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    and a3, a0, a2
+; RV32I-ZALRSC-NEXT:    not a3, a3
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB57_3
+; RV32I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB57_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    andi a2, a0, 1
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_nand_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a1, a1, 1
@@ -4750,6 +7002,28 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV64IA-ZACAS-NEXT:    sw a2, 0(a0)
 ; RV64IA-ZACAS-NEXT:    mv a0, a1
 ; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV64I-ZALRSC-NEXT:    mv a1, a0
+; RV64I-ZALRSC-NEXT:    beqz a2, .LBB57_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB57_3: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV64I-ZALRSC-NEXT:    and a3, a0, a2
+; RV64I-ZALRSC-NEXT:    not a3, a3
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB57_3
+; RV64I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB57_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV64I-ZALRSC-NEXT:    andi a2, a0, 1
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -4803,6 +7077,27 @@ define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
 ; RV32IA-NEXT:    sw a2, 0(a1)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB58_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB58_3: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    or a3, a0, a2
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB58_3
+; RV32I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB58_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    ori a2, a0, 1
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_or_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a1, a1, 1
@@ -4838,6 +7133,28 @@ define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
 ; RV64IA-NEXT:    ori a2, a0, 1
 ; RV64IA-NEXT:    sw a2, 0(a1)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a1, 1
+; RV64I-ZALRSC-NEXT:    beqz a1, .LBB58_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB58_3: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    or a3, a1, a2
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB58_3
+; RV64I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB58_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    ori a2, a1, 1
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -4891,6 +7208,27 @@ define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV32IA-NEXT:    sw a2, 0(a1)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB59_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB59_3: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    xor a3, a0, a2
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB59_3
+; RV32I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB59_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    xori a2, a0, 1
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_xor_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a1, a1, 1
@@ -4926,6 +7264,28 @@ define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV64IA-NEXT:    xori a2, a0, 1
 ; RV64IA-NEXT:    sw a2, 0(a1)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a1, 1
+; RV64I-ZALRSC-NEXT:    beqz a1, .LBB59_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB59_3: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    xor a3, a1, a2
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB59_3
+; RV64I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB59_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    xori a2, a1, 1
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -5007,6 +7367,37 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV32IA-NEXT:    sw a2, 0(a1)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB60_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB60_5: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    mv a3, a0
+; RV32I-ZALRSC-NEXT:    bge a3, a2, .LBB60_7
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %then
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB60_5 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:  .LBB60_7: # %then
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB60_5 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB60_5
+; RV32I-ZALRSC-NEXT:  # %bb.8: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB60_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    mv a2, a0
+; RV32I-ZALRSC-NEXT:    bgtz a0, .LBB60_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %else
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB60_4: # %else
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_max_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -32
@@ -5070,6 +7461,37 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV64IA-NEXT:  .LBB60_4: # %else
 ; RV64IA-NEXT:    sw a2, 0(a1)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV64I-ZALRSC-NEXT:    mv a1, a0
+; RV64I-ZALRSC-NEXT:    beqz a2, .LBB60_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB60_5: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV64I-ZALRSC-NEXT:    mv a3, a0
+; RV64I-ZALRSC-NEXT:    bge a3, a2, .LBB60_7
+; RV64I-ZALRSC-NEXT:  # %bb.6: # %then
+; RV64I-ZALRSC-NEXT:    # in Loop: Header=BB60_5 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB60_7: # %then
+; RV64I-ZALRSC-NEXT:    # in Loop: Header=BB60_5 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB60_5
+; RV64I-ZALRSC-NEXT:  # %bb.8: # %then
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB60_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV64I-ZALRSC-NEXT:    mv a2, a0
+; RV64I-ZALRSC-NEXT:    bgtz a0, .LBB60_4
+; RV64I-ZALRSC-NEXT:  # %bb.3: # %else
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB60_4: # %else
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -5155,6 +7577,37 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV32IA-NEXT:    sw a2, 0(a1)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB61_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB61_5: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    mv a3, a0
+; RV32I-ZALRSC-NEXT:    bge a2, a3, .LBB61_7
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %then
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB61_5 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:  .LBB61_7: # %then
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB61_5 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB61_5
+; RV32I-ZALRSC-NEXT:  # %bb.8: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB61_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    mv a2, a0
+; RV32I-ZALRSC-NEXT:    blez a0, .LBB61_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %else
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB61_4: # %else
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_min_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -32
@@ -5220,6 +7673,37 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV64IA-NEXT:  .LBB61_4: # %else
 ; RV64IA-NEXT:    sw a2, 0(a1)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV64I-ZALRSC-NEXT:    mv a1, a0
+; RV64I-ZALRSC-NEXT:    beqz a2, .LBB61_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB61_5: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV64I-ZALRSC-NEXT:    mv a3, a0
+; RV64I-ZALRSC-NEXT:    bge a2, a3, .LBB61_7
+; RV64I-ZALRSC-NEXT:  # %bb.6: # %then
+; RV64I-ZALRSC-NEXT:    # in Loop: Header=BB61_5 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB61_7: # %then
+; RV64I-ZALRSC-NEXT:    # in Loop: Header=BB61_5 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB61_5
+; RV64I-ZALRSC-NEXT:  # %bb.8: # %then
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB61_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV64I-ZALRSC-NEXT:    mv a2, a0
+; RV64I-ZALRSC-NEXT:    blez a0, .LBB61_4
+; RV64I-ZALRSC-NEXT:  # %bb.3: # %else
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB61_4: # %else
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -5290,6 +7774,34 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV32IA-NEXT:    sw a2, 0(a1)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB62_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB62_3: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    mv a3, a0
+; RV32I-ZALRSC-NEXT:    bgeu a3, a2, .LBB62_5
+; RV32I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB62_3 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:  .LBB62_5: # %then
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB62_3 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB62_3
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB62_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    seqz a2, a0
+; RV32I-ZALRSC-NEXT:    add a2, a0, a2
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_umax_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -32
@@ -5347,6 +7859,35 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV64IA-NEXT:    add a2, a0, a2
 ; RV64IA-NEXT:    sw a2, 0(a1)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a1, a1, 1
+; RV64I-ZALRSC-NEXT:    beqz a1, .LBB62_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB62_3: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT:    mv a3, a1
+; RV64I-ZALRSC-NEXT:    bgeu a3, a2, .LBB62_5
+; RV64I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV64I-ZALRSC-NEXT:    # in Loop: Header=BB62_3 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB62_5: # %then
+; RV64I-ZALRSC-NEXT:    # in Loop: Header=BB62_3 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB62_3
+; RV64I-ZALRSC-NEXT:  # %bb.6: # %then
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB62_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT:    seqz a2, a1
+; RV64I-ZALRSC-NEXT:    add a2, a1, a2
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT:    sext.w a0, a1
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -5434,6 +7975,38 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV32IA-NEXT:    sw a2, 0(a1)
 ; RV32IA-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV32I-ZALRSC-NEXT:    mv a1, a0
+; RV32I-ZALRSC-NEXT:    beqz a2, .LBB63_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB63_5: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT:    mv a3, a0
+; RV32I-ZALRSC-NEXT:    bgeu a2, a3, .LBB63_7
+; RV32I-ZALRSC-NEXT:  # %bb.6: # %then
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB63_5 Depth=1
+; RV32I-ZALRSC-NEXT:    mv a3, a2
+; RV32I-ZALRSC-NEXT:  .LBB63_7: # %then
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB63_5 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB63_5
+; RV32I-ZALRSC-NEXT:  # %bb.8: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB63_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT:    li a3, 1
+; RV32I-ZALRSC-NEXT:    mv a2, a0
+; RV32I-ZALRSC-NEXT:    bltu a0, a3, .LBB63_4
+; RV32I-ZALRSC-NEXT:  # %bb.3: # %else
+; RV32I-ZALRSC-NEXT:    li a2, 1
+; RV32I-ZALRSC-NEXT:  .LBB63_4: # %else
+; RV32I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -32
@@ -5501,6 +8074,38 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
 ; RV64IA-NEXT:  .LBB63_4: # %else
 ; RV64IA-NEXT:    sw a2, 0(a1)
 ; RV64IA-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    andi a2, a1, 1
+; RV64I-ZALRSC-NEXT:    mv a1, a0
+; RV64I-ZALRSC-NEXT:    beqz a2, .LBB63_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB63_5: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w a0, (a1)
+; RV64I-ZALRSC-NEXT:    mv a3, a0
+; RV64I-ZALRSC-NEXT:    bgeu a2, a3, .LBB63_7
+; RV64I-ZALRSC-NEXT:  # %bb.6: # %then
+; RV64I-ZALRSC-NEXT:    # in Loop: Header=BB63_5 Depth=1
+; RV64I-ZALRSC-NEXT:    mv a3, a2
+; RV64I-ZALRSC-NEXT:  .LBB63_7: # %then
+; RV64I-ZALRSC-NEXT:    # in Loop: Header=BB63_5 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w a3, a3, (a1)
+; RV64I-ZALRSC-NEXT:    bnez a3, .LBB63_5
+; RV64I-ZALRSC-NEXT:  # %bb.8: # %then
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB63_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a0, 0(a1)
+; RV64I-ZALRSC-NEXT:    li a3, 1
+; RV64I-ZALRSC-NEXT:    mv a2, a0
+; RV64I-ZALRSC-NEXT:    bltu a0, a3, .LBB63_4
+; RV64I-ZALRSC-NEXT:  # %bb.3: # %else
+; RV64I-ZALRSC-NEXT:    li a2, 1
+; RV64I-ZALRSC-NEXT:  .LBB63_4: # %else
+; RV64I-ZALRSC-NEXT:    sw a2, 0(a1)
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
@@ -5570,6 +8175,25 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3
 ; RV32IA-ZACAS-NEXT:    lw a0, 0(a0)
 ; RV32IA-ZACAS-NEXT:    ret
 ;
+; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_crossbb:
+; RV32I-ZALRSC:       # %bb.0:
+; RV32I-ZALRSC-NEXT:    mv a4, a0
+; RV32I-ZALRSC-NEXT:    beqz a3, .LBB64_2
+; RV32I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV32I-ZALRSC-NEXT:  .LBB64_3: # %then
+; RV32I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT:    lr.w.aqrl a0, (a4)
+; RV32I-ZALRSC-NEXT:    bne a0, a1, .LBB64_5
+; RV32I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV32I-ZALRSC-NEXT:    # in Loop: Header=BB64_3 Depth=1
+; RV32I-ZALRSC-NEXT:    sc.w.rl a3, a2, (a4)
+; RV32I-ZALRSC-NEXT:    bnez a3, .LBB64_3
+; RV32I-ZALRSC-NEXT:  .LBB64_5: # %then
+; RV32I-ZALRSC-NEXT:    ret
+; RV32I-ZALRSC-NEXT:  .LBB64_2: # %else
+; RV32I-ZALRSC-NEXT:    lw a0, 0(a4)
+; RV32I-ZALRSC-NEXT:    ret
+;
 ; RV64I-LABEL: cmpxchg_i32_monotonic_crossbb:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    beqz a3, .LBB64_2
@@ -5620,6 +8244,26 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3
 ; RV64IA-ZACAS-NEXT:  .LBB64_2: # %else
 ; RV64IA-ZACAS-NEXT:    lw a0, 0(a0)
 ; RV64IA-ZACAS-NEXT:    ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_crossbb:
+; RV64I-ZALRSC:       # %bb.0:
+; RV64I-ZALRSC-NEXT:    beqz a3, .LBB64_2
+; RV64I-ZALRSC-NEXT:  # %bb.1: # %then
+; RV64I-ZALRSC-NEXT:  .LBB64_3: # %then
+; RV64I-ZALRSC-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT:    lr.w.aqrl a3, (a0)
+; RV64I-ZALRSC-NEXT:    bne a3, a1, .LBB64_5
+; RV64I-ZALRSC-NEXT:  # %bb.4: # %then
+; RV64I-ZALRSC-NEXT:    # in Loop: Header=BB64_3 Depth=1
+; RV64I-ZALRSC-NEXT:    sc.w.rl a4, a2, (a0)
+; RV64I-ZALRSC-NEXT:    bnez a4, .LBB64_3
+; RV64I-ZALRSC-NEXT:  .LBB64_5: # %then
+; RV64I-ZALRSC-NEXT:    sext.w a0, a3
+; RV64I-ZALRSC-NEXT:    ret
+; RV64I-ZALRSC-NEXT:  .LBB64_2: # %else
+; RV64I-ZALRSC-NEXT:    lw a3, 0(a0)
+; RV64I-ZALRSC-NEXT:    sext.w a0, a3
+; RV64I-ZALRSC-NEXT:    ret
   br i1 %c, label %then, label %else
 
 then:
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 693a40d..5e5f2b7 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -217,6 +217,11 @@
 ; CHECK-NEXT:   xsfmm64t                         - 'XSfmm64t' (TE=64 configuration). 
 ; CHECK-NEXT:   xsfmmbase                        - 'XSfmmbase' (All non arithmetic instructions for all TEWs and sf.vtzero).
 ; CHECK-NEXT:   xsfvcp                           - 'XSfvcp' (SiFive Custom Vector Coprocessor Interface Instructions).
+; CHECK-NEXT:   xsfvfbfexp16e                    - 'XSfvfbfexp16e' (SiFive Vector Floating-Point Exponential Function Instruction, BFloat16).
+; CHECK-NEXT:   xsfvfexp16e                      - 'XSfvfexp16e' (SiFive Vector Floating-Point Exponential Function Instruction, Half Precision).
+; CHECK-NEXT:   xsfvfexp32e                      - 'XSfvfexp32e' (SiFive Vector Floating-Point Exponential Function Instruction, Single Precision).
+; CHECK-NEXT:   xsfvfexpa                        - 'XSfvfexpa' (SiFive Vector Floating-Point Exponential Approximation Instruction).
+; CHECK-NEXT:   xsfvfexpa64e                     - 'XSfvfexpa64e' (SiFive Vector Floating-Point Exponential Approximation Instruction with Double-Precision).
 ; CHECK-NEXT:   xsfvfnrclipxfqf                  - 'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions).
 ; CHECK-NEXT:   xsfvfwmaccqqq                    - 'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction (4-by-4)).
 ; CHECK-NEXT:   xsfvqmaccdod                     - 'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2)).
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll
index cce1eda..1aee688 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
 
 %"__cblayout_$Globals" = type <{ i32 }>
 
@@ -9,7 +10,6 @@
 
 ; CHECK: OpCapability Shader
 ; CHECK: OpCapability StorageTexelBufferArrayDynamicIndexingEXT
-
 define void @main() local_unnamed_addr #0 {
 entry:
   %"$Globals.cb_h.i.i" = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 4, 0), 2, 0) @"llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_$Globalss_4_0t_2_0t"(i32 1, i32 0, i32 1, i32 0, ptr nonnull @"$Globals.str")
@@ -19,4 +19,8 @@ entry:
   %2 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 98)
   store i32 99, ptr addrspace(11) %2, align 4
   ret void
-}
-\ No newline at end of file
+}
+
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @"$Globals.cb", ptr addrspace(12) @i}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll
index da69a2f..163fc9d 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
 
 %"__cblayout_$Globals" = type <{ i32 }>
 
@@ -19,3 +20,7 @@ entry:
   store i32 98, ptr addrspace(11) %2, align 4
   ret void
 }
+
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @"$Globals.cb", ptr addrspace(12) @i}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll
new file mode 100644
index 0000000..7c44b6d
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll
@@ -0,0 +1,43 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
+
+; When accessing read-only `Buffer` types, SPIR-V should use `OpImageFetch` instead of `OpImageRead`.
+; https://github.com/llvm/llvm-project/issues/162891
+
+; CHECK-DAG: OpCapability SampledBuffer
+; CHECK-DAG: OpCapability ImageBuffer
+; CHECK-DAG: [[TypeInt:%[0-9]+]] = OpTypeInt 32 0
+; CHECK-DAG: [[TypeImageBuffer:%[0-9]+]] = OpTypeImage [[TypeInt]] Buffer 2 0 0 1 Unknown
+; CHECK-DAG: [[TypePtrImageBuffer:%[0-9]+]] = OpTypePointer UniformConstant [[TypeImageBuffer]]
+; CHECK-DAG: [[TypeVector:%[0-9]+]] = OpTypeVector [[TypeInt]] 4
+; CHECK-DAG: [[Index:%[0-9]+]] = OpConstant [[TypeInt]] 98
+; CHECK-DAG: [[Variable:%[0-9]+]] = OpVariable [[TypePtrImageBuffer]] UniformConstant
+@.str = private unnamed_addr constant [7 x i8] c"rwbuff\00", align 1
+@.str.2 = private unnamed_addr constant [5 x i8] c"buff\00", align 1
+@.str.4 = private unnamed_addr constant [8 x i8] c"unknown\00", align 1
+
+define void @main() local_unnamed_addr #0 {
+  %1 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_2_33t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+  %2 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_1_0t(i32 1, i32 0, i32 1, i32 0, ptr nonnull @.str.2)
+  %3 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 0, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_0_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @.str.4)
+  %4 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_1_0t(target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) %2, i32 98)
+; CHECK: [[Load:%[0-9]+]] = OpLoad [[TypeImageBuffer]] [[Variable]]
+; CHECK: [[ImageFetch:%[0-9]+]] = OpImageFetch [[TypeVector]] [[Load]] [[Index]]
+; CHECK: {{.*}} = OpCompositeExtract [[TypeInt]] [[ImageFetch]] 0
+  %5 = load i32, ptr addrspace(11) %4, align 4
+  %6 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 99)
+  store i32 %5, ptr addrspace(11) %6, align 4
+  %7 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 96)
+; CHECK: {{%[0-9]+}} = OpLoad {{.*}}
+; CHECK: {{%[0-9]+}} = OpImageRead {{.*}}
+  %8 = load i32, ptr addrspace(11) %7, align 4
+  %9 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 97)
+  store i32 %8, ptr addrspace(11) %9, align 4
+  %10 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_0_0t(target("spirv.Image", i32, 5, 2, 0, 0, 0, 0) %3, i32 94)
+; CHECK: {{%[0-9]+}} = OpLoad {{.*}}
+; CHECK: {{%[0-9]+}} = OpImageRead {{.*}}
+  %11 = load i32, ptr addrspace(11) %10, align 4
+  %12 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 95)
+  store i32 %11, ptr addrspace(11) %12, align 4
+  ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll b/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll
index 4a38d7a..c87f113 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll
@@ -1,7 +1,7 @@
 ; Test that combined sin/cos library call is emitted when appropriate
 
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefix=CHECK-OPT
-; RUN: llc < %s -mtriple=s390x-linux-gnu -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-OPT
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefix=CHECK-OPT
 
 define float @f1(float %x) {
 ; CHECK-OPT-LABEL: f1:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-vs-unpredicated-copy.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-vs-unpredicated-copy.mir
new file mode 100644
index 0000000..5783133
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-vs-unpredicated-copy.mir
@@ -0,0 +1,146 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
+
+# From bug #162644. The _wrong_ output of this test is to generate the
+# body of the tail-predicated loop like this:
+#
+#     $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, $noreg, undef $q2
+#     renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, renamable $lr :: (load unknown-size from %ir.13, align 4)
+#     $q0 = MVE_VORR $q1, $q1, 0, $noreg, $noreg, undef $q0
+#     renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 0, killed $noreg, renamable $lr, killed renamable $q0
+#     $lr = MVE_LETP killed renamable $lr, %bb.1
+#
+# in which the second MVE_VORR, copying q1 into q0, is an invalid conversion of
+# the input MQPRCopy, because it won't copy the vector lanes disabled by
+# FPSCR.LTPSIZE, and those are needed in the output value of the loop.
+#
+# In the right output, that MQPRCopy is expanded into a pair of VMOVD copying
+# d2,d3 into d0,d1 respectively, which are unaffected by LTPSIZE.
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-unknown-none-eabihf"
+
+  @inactive = dso_local local_unnamed_addr global <4 x float> zeroinitializer, align 16
+
+  define <4 x float> @test_func(ptr %0, i32 %1) {
+    %3 = load <4 x float>, ptr @inactive, align 16
+    %4 = add i32 %1, 3
+    %5 = call i32 @llvm.smin.i32(i32 %1, i32 4)
+    %6 = sub i32 %4, %5
+    %7 = lshr i32 %6, 2
+    %8 = add nuw nsw i32 %7, 1
+    %9 = call i32 @llvm.start.loop.iterations.i32(i32 %8)
+    br label %10
+
+  10:                                               ; preds = %10, %2
+    %11 = phi <4 x float> [ splat (float 0x3FB99999A0000000), %2 ], [ %17, %10 ]
+    %12 = phi i32 [ %1, %2 ], [ %19, %10 ]
+    %13 = phi ptr [ %0, %2 ], [ %18, %10 ]
+    %14 = phi i32 [ %9, %2 ], [ %20, %10 ]
+    %15 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %12)
+    %16 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %13, i32 4, <4 x i1> %15, <4 x float> zeroinitializer)
+    %17 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %11, <4 x float> %16, <4 x i1> %15, <4 x float> %3)
+    %18 = getelementptr inbounds nuw i8, ptr %13, i32 16
+    %19 = add i32 %12, -4
+    %20 = call i32 @llvm.loop.decrement.reg.i32(i32 %14, i32 1)
+    %21 = icmp ne i32 %20, 0
+    br i1 %21, label %10, label %22
+
+  22:                                               ; preds = %10
+    ret <4 x float> %17
+  }
+...
+---
+name:            test_func
+alignment:       4
+legalized:       false
+tracksRegLiveness: true
+registers:       []
+liveins:
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+body:             |
+  ; CHECK-LABEL: name: test_func
+  ; CHECK: bb.0 (%ir-block.2):
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $lr, $r0, $r1, $r7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r7, -8
+  ; CHECK-NEXT:   $r2 = t2MOVi16 target-flags(arm-lo16) @inactive, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @inactive, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $q1 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg, $noreg :: (dereferenceable load (s128) from @inactive)
+  ; CHECK-NEXT:   $r3 = t2MOVi16 52429, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $r3 = t2MOVTi16 killed $r3, 15820, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $q0 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0
+  ; CHECK-NEXT:   $lr = MVE_DLSTP_32 killed renamable $r1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.10, align 4):
+  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:   liveins: $lr, $d2, $d3, $q0, $r0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, $noreg, undef $q2
+  ; CHECK-NEXT:   renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, renamable $lr :: (load unknown-size from %ir.13, align 4)
+  ; CHECK-NEXT:   $d0 = VMOVD $d2, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $d1 = VMOVD $d3, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 0, killed $noreg, renamable $lr, killed renamable $q0
+  ; CHECK-NEXT:   $lr = MVE_LETP killed renamable $lr, %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2 (%ir-block.22):
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $q0
+  bb.0 (%ir-block.2):
+    successors: %bb.1(0x80000000)
+    liveins: $r0, $r1, $r7, $lr
+
+    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    $r2 = t2MOVi16 target-flags(arm-lo16) @inactive, 14 /* CC::al */, $noreg
+    tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @inactive, 14 /* CC::al */, $noreg
+    renamable $r3 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
+    renamable $q1 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg, $noreg :: (dereferenceable load (s128) from @inactive)
+    $r2 = tMOVr $r1, 14 /* CC::al */, $noreg
+    t2IT 10, 8, implicit-def $itstate
+    renamable $r2 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r2, implicit killed $itstate
+    renamable $r2, dead $cpsr = tSUBrr renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg
+    renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 3, 14 /* CC::al */, $noreg
+    renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
+    $r3 = t2MOVi16 52429, 14 /* CC::al */, $noreg
+    $r3 = t2MOVTi16 killed $r3, 15820, 14 /* CC::al */, $noreg
+    renamable $q0 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0
+    renamable $lr = t2DoLoopStartTP killed renamable $r2, renamable $r1
+
+  bb.1 (%ir-block.10, align 4):
+    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+    liveins: $lr, $q0, $q1, $r0, $r1
+
+    renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg, $noreg
+    $q2 = MQPRCopy killed $q0
+    MVE_VPST 8, implicit $vpr
+    renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, renamable $lr :: (load unknown-size from %ir.13, align 4)
+    $q0 = MQPRCopy $q1
+    MVE_VPST 8, implicit $vpr
+    renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 1, killed renamable $vpr, renamable $lr, killed renamable $q0
+    renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg
+    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr
+    tB %bb.2, 14 /* CC::al */, $noreg
+
+  bb.2 (%ir-block.22):
+    liveins: $q0
+
+    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $q0
+...
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll
index 053d6a1..d741411 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll
@@ -94,5 +94,5 @@ attributes #1 = { minsize nofree norecurse nounwind optsize }
 !llvm.module.flags = !{!0, !1, !2}
 
 !0 = !{i32 8, !"branch-target-enforcement", i32 0}
-!1 = !{i32 8, !"sign-return-address", i32 1}
+!1 = !{i32 8, !"sign-return-address", i32 2}
 !2 = !{i32 8, !"sign-return-address-all", i32 0}
diff --git a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
index 94efe0f..104ec31 100644
--- a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
+++ b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
@@ -5,6 +5,7 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20
 %struct.TwoInts = type { i32, i32 }
 %struct.ThreeInts = type { i32, i32, i32 }
 %struct.FourInts = type { i32, i32, i32, i32 }
+%struct.TwoShorts = type { i16, i16 }
 %struct.ThreeShorts = type { i16, i16, i16 }
 %struct.FourShorts = type { i16, i16, i16, i16 }
 %struct.FiveShorts = type { i16, i16, i16, i16, i16 }
@@ -12,6 +13,8 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20
 %struct.ThreeBytes = type { i8, i8, i8 }
 %struct.FourBytes = type { i8, i8, i8, i8 }
 %struct.EightBytes = type { i8, i8, i8, i8, i8, i8, i8, i8 }
+%struct.TwoFloats = type { float, float }
+%struct.FourFloats = type { float, float, float, float }
 
 ; CHECK-LABEL: two_ints_same_op:
 ; CHECK: loop
@@ -1536,3 +1539,1608 @@ define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0,
 34:                                               ; preds = %6, %4
   ret void
 }
+
+; CHECK-LABEL: two_floats_same_op:
+; CHECK-NOT: f32x4.mul
+define hidden void @two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp21.not = icmp eq i32 %N, 0
+  br i1 %cmp21.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.022
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.022
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.022
+  store float %mul, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %mul8, ptr %y10, align 4
+  %inc = add nuw i32 %i.022, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_vary_op:
+; CHECK-NOT: f32x4
+define hidden void @two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp20.not = icmp eq i32 %N, 0
+  br i1 %cmp20.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.021 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.021
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.021
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.021
+  store float %add, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %sub, ptr %y9, align 4
+  %inc = add nuw i32 %i.021, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_bytes_two_floats_same_op:
+; CHECK: loop
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s	
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_bytes_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp24.not = icmp eq i32 %N, 0
+  br i1 %cmp24.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoBytes, ptr %a, i32 %i.025
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoBytes, ptr %b, i32 %i.025
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.025
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %inc = add nuw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_bytes_two_floats_vary_op:
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.add
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.sub
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_bytes_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp23.not = icmp eq i32 %N, 0
+  br i1 %cmp23.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.024 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoBytes, ptr %a, i32 %i.024
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoBytes, ptr %b, i32 %i.024
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %add = fadd float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.024
+  store float %add, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %sub = fsub float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %sub, ptr %y12, align 4
+  %inc = add nuw i32 %i.024, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_bytes_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: v128.store64_lane
+define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp22.not = icmp eq i32 %N, 0
+  br i1 %cmp22.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.023
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.023
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoBytes, ptr %res, i32 %i.023
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %conv9 = fptosi float %mul8 to i8
+  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv9, ptr %y11, align 1
+  %inc = add nuw i32 %i.023, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_bytes_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: f32x4.add
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: f32x4.sub
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: v128.store64_lane
+define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp21.not = icmp eq i32 %N, 0
+  br i1 %cmp21.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.022
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.022
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %conv = fptosi float %add to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoBytes, ptr %res, i32 %i.022
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %conv8 = fptosi float %sub to i8
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv8, ptr %y10, align 1
+  %inc = add nuw i32 %i.022, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_shorts_two_floats_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_shorts_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp24.not = icmp eq i32 %N, 0
+  br i1 %cmp24.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoShorts, ptr %a, i32 %i.025
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoShorts, ptr %b, i32 %i.025
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.025
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %inc = add nuw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_shorts_two_floats_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.add
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.sub
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_shorts_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp23.not = icmp eq i32 %N, 0
+  br i1 %cmp23.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.024 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoShorts, ptr %a, i32 %i.024
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoShorts, ptr %b, i32 %i.024
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %add = fadd float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.024
+  store float %add, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %sub = fsub float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %sub, ptr %y12, align 4
+  %inc = add nuw i32 %i.024, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_shorts_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: v128.store
+define hidden void @two_floats_two_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp22.not = icmp eq i32 %N, 0
+  br i1 %cmp22.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.023
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.023
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i16
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoShorts, ptr %res, i32 %i.023
+  store i16 %conv, ptr %arrayidx3, align 2
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %conv9 = fptosi float %mul8 to i16
+  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i16 %conv9, ptr %y11, align 2
+  %inc = add nuw i32 %i.023, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_shorts_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: f32x4.add
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: f32x4.sub
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: v128.store
+define hidden void @two_floats_two_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp21.not = icmp eq i32 %N, 0
+  br i1 %cmp21.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.022
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.022
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %conv = fptosi float %add to i16
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoShorts, ptr %res, i32 %i.022
+  store i16 %conv, ptr %arrayidx3, align 2
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %conv8 = fptosi float %sub to i16
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i16 %conv8, ptr %y10, align 2
+  %inc = add nuw i32 %i.022, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: f32x4.mul
+; CHECK: v128.store
+define hidden void @four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp45.not = icmp eq i32 %N, 0
+  br i1 %cmp45.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.046 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.046
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.046
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.046
+  store float %mul, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %mul8, ptr %y10, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z13 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z13, align 4
+  %mul14 = fmul float %4, %5
+  %z16 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 8
+  store float %mul14, ptr %z16, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w19 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w19, align 4
+  %mul20 = fmul float %6, %7
+  %w22 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 12
+  store float %mul20, ptr %w22, align 4
+  %inc = add nuw i32 %i.046, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_vary_op:
+; CHECK-NOT: f32x4
+define hidden void @four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp42.not = icmp eq i32 %N, 0
+  br i1 %cmp42.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.043 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.043
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.043
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.043
+  store float %add, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %sub, ptr %y9, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z12 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z12, align 4
+  %mul = fmul float %4, %5
+  %z14 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 8
+  store float %mul, ptr %z14, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w17 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w17, align 4
+  %div = fdiv float %6, %7
+  %w19 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 12
+  store float %div, ptr %w19, align 4
+  %inc = add nuw i32 %i.043, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_bytes_four_floats_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+define hidden void @four_bytes_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp52.not = icmp eq i32 %N, 0
+  br i1 %cmp52.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.053 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourBytes, ptr %a, i32 %i.053
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourBytes, ptr %b, i32 %i.053
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.053
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %4 = load i8, ptr %z, align 1
+  %conv15 = sitofp i8 %4 to float
+  %z17 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %5 = load i8, ptr %z17, align 1
+  %conv18 = sitofp i8 %5 to float
+  %mul19 = fmul float %conv15, %conv18
+  %z21 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 8
+  store float %mul19, ptr %z21, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 3
+  %6 = load i8, ptr %w, align 1
+  %conv23 = sitofp i8 %6 to float
+  %w25 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 3
+  %7 = load i8, ptr %w25, align 1
+  %conv26 = sitofp i8 %7 to float
+  %mul27 = fmul float %conv23, %conv26
+  %w29 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 12
+  store float %mul27, ptr %w29, align 4
+  %inc = add nuw i32 %i.053, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_bytes_four_floats_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.add
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.div
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.sub
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+define hidden void @four_bytes_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp49.not = icmp eq i32 %N, 0
+  br i1 %cmp49.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.050 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourBytes, ptr %a, i32 %i.050
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourBytes, ptr %b, i32 %i.050
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.050
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %add = fadd float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %add, ptr %y12, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %4 = load i8, ptr %z, align 1
+  %conv14 = sitofp i8 %4 to float
+  %z16 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %5 = load i8, ptr %z16, align 1
+  %conv17 = sitofp i8 %5 to float
+  %div = fdiv float %conv14, %conv17
+  %z19 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 8
+  store float %div, ptr %z19, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 3
+  %6 = load i8, ptr %w, align 1
+  %conv21 = sitofp i8 %6 to float
+  %w23 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 3
+  %7 = load i8, ptr %w23, align 1
+  %conv24 = sitofp i8 %7 to float
+  %sub = fsub float %conv21, %conv24
+  %w26 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 12
+  store float %sub, ptr %w26, align 4
+  %inc = add nuw i32 %i.050, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_four_bytes_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: v128.store
+define hidden void @four_floats_four_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp48.not = icmp eq i32 %N, 0
+  br i1 %cmp48.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.049 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.049
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.049
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourBytes, ptr %res, i32 %i.049
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %conv9 = fptosi float %mul8 to i8
+  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv9, ptr %y11, align 1
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z14 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z14, align 4
+  %mul15 = fmul float %4, %5
+  %conv16 = fptosi float %mul15 to i8
+  %z18 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i8 %conv16, ptr %z18, align 1
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w21 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w21, align 4
+  %mul22 = fmul float %6, %7
+  %conv23 = fptosi float %mul22 to i8
+  %w25 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 3
+  store i8 %conv23, ptr %w25, align 1
+  %inc = add nuw i32 %i.049, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_four_bytes_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.add
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.div
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.sub
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: v128.store
+define hidden void @four_floats_four_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp45.not = icmp eq i32 %N, 0
+  br i1 %cmp45.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.046 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.046
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.046
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourBytes, ptr %res, i32 %i.046
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %add = fadd float %2, %3
+  %conv8 = fptosi float %add to i8
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv8, ptr %y10, align 1
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z13 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z13, align 4
+  %div = fdiv float %4, %5
+  %conv14 = fptosi float %div to i8
+  %z16 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i8 %conv14, ptr %z16, align 1
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w19 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w19, align 4
+  %sub = fsub float %6, %7
+  %conv20 = fptosi float %sub to i8
+  %w22 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 3
+  store i8 %conv20, ptr %w22, align 1
+  %inc = add nuw i32 %i.046, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_shorts_four_floats_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+define hidden void @four_shorts_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp52.not = icmp eq i32 %N, 0
+  br i1 %cmp52.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.053 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourShorts, ptr %a, i32 %i.053
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourShorts, ptr %b, i32 %i.053
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.053
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %4 = load i16, ptr %z, align 2
+  %conv15 = sitofp i16 %4 to float
+  %z17 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %5 = load i16, ptr %z17, align 2
+  %conv18 = sitofp i16 %5 to float
+  %mul19 = fmul float %conv15, %conv18
+  %z21 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 8
+  store float %mul19, ptr %z21, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 6
+  %6 = load i16, ptr %w, align 2
+  %conv23 = sitofp i16 %6 to float
+  %w25 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 6
+  %7 = load i16, ptr %w25, align 2
+  %conv26 = sitofp i16 %7 to float
+  %mul27 = fmul float %conv23, %conv26
+  %w29 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 12
+  store float %mul27, ptr %w29, align 4
+  %inc = add nuw i32 %i.053, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_shorts_four_floats_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.add
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.div
+; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.sub
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+define hidden void @four_shorts_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp49.not = icmp eq i32 %N, 0
+  br i1 %cmp49.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.050 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourShorts, ptr %a, i32 %i.050
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourShorts, ptr %b, i32 %i.050
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.050
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %add = fadd float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %add, ptr %y12, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %4 = load i16, ptr %z, align 2
+  %conv14 = sitofp i16 %4 to float
+  %z16 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %5 = load i16, ptr %z16, align 2
+  %conv17 = sitofp i16 %5 to float
+  %div = fdiv float %conv14, %conv17
+  %z19 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 8
+  store float %div, ptr %z19, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 6
+  %6 = load i16, ptr %w, align 2
+  %conv21 = sitofp i16 %6 to float
+  %w23 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 6
+  %7 = load i16, ptr %w23, align 2
+  %conv24 = sitofp i16 %7 to float
+  %sub = fsub float %conv21, %conv24
+  %w26 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 12
+  store float %sub, ptr %w26, align 4
+  %inc = add nuw i32 %i.050, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_four_shorts_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: v128.store
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.splat
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: v128.store
+define hidden void @four_floats_four_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp48.not = icmp eq i32 %N, 0
+  br i1 %cmp48.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.049 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.049
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.049
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i16
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourShorts, ptr %res, i32 %i.049
+  store i16 %conv, ptr %arrayidx3, align 2
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %conv9 = fptosi float %mul8 to i16
+  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i16 %conv9, ptr %y11, align 2
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z14 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z14, align 4
+  %mul15 = fmul float %4, %5
+  %conv16 = fptosi float %mul15 to i16
+  %z18 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store i16 %conv16, ptr %z18, align 2
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w21 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w21, align 4
+  %mul22 = fmul float %6, %7
+  %conv23 = fptosi float %mul22 to i16
+  %w25 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 6
+  store i16 %conv23, ptr %w25, align 2
+  %inc = add nuw i32 %i.049, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_four_shorts_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.add
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.div
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: f32x4.sub
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: v128.store
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.splat
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.replace_lane
+; CHECK: v128.store
+define hidden void @four_floats_four_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp45.not = icmp eq i32 %N, 0
+  br i1 %cmp45.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.046 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.046
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.046
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i16
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourShorts, ptr %res, i32 %i.046
+  store i16 %conv, ptr %arrayidx3, align 2
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %add = fadd float %2, %3
+  %conv8 = fptosi float %add to i16
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i16 %conv8, ptr %y10, align 2
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z13 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z13, align 4
+  %div = fdiv float %4, %5
+  %conv14 = fptosi float %div to i16
+  %z16 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store i16 %conv14, ptr %z16, align 2
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w19 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w19, align 4
+  %sub = fsub float %6, %7
+  %conv20 = fptosi float %sub to i16
+  %w22 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 6
+  store i16 %conv20, ptr %w22, align 2
+  %inc = add nuw i32 %i.046, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll
new file mode 100644
index 0000000..45f4ddd
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mattr=+simd128,+relaxed-simd | FileCheck %s
+
+; Test that fmaxnum and fmaximumnum get transformed to relaxed_max
+
+target triple = "wasm32"
+
+define <4 x float> @test_maxnum_f32x4(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_maxnum_f32x4:
+; CHECK:         .functype test_maxnum_f32x4 (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f32x4.relaxed_max
+; CHECK-NEXT:    # fallthrough-return
+  %result = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %result
+}
+
+define <4 x float> @test_maximumnum_f32x4(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_maximumnum_f32x4:
+; CHECK:         .functype test_maximumnum_f32x4 (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f32x4.relaxed_max
+; CHECK-NEXT:    # fallthrough-return
+  %result = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %result
+}
+
+define <2 x double> @test_maxnum_f64x2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_maxnum_f64x2:
+; CHECK:         .functype test_maxnum_f64x2 (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f64x2.relaxed_max
+; CHECK-NEXT:    # fallthrough-return
+  %result = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %result
+}
+
+define <2 x double> @test_minimumnum_f64x2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_minimumnum_f64x2:
+; CHECK:         .functype test_minimumnum_f64x2 (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f64x2.relaxed_max
+; CHECK-NEXT:    # fallthrough-return
+  %result = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %result
+}
+
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.maximumnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.maximumnum.v2f64(<2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmin.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmin.ll
new file mode 100644
index 0000000..f3eec02
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmin.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mattr=+simd128,+relaxed-simd | FileCheck %s
+
+; Test that fminnum and fminimumnum get transformed to relaxed_min
+
+target triple = "wasm32"
+
+define <4 x float> @test_minnum_f32x4(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_minnum_f32x4:
+; CHECK:         .functype test_minnum_f32x4 (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f32x4.relaxed_min
+; CHECK-NEXT:    # fallthrough-return
+  %result = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %result
+}
+
+define <4 x float> @test_minimumnum_f32x4(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_minimumnum_f32x4:
+; CHECK:         .functype test_minimumnum_f32x4 (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f32x4.relaxed_min
+; CHECK-NEXT:    # fallthrough-return
+  %result = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %result
+}
+
+define <2 x double> @test_minnum_f64x2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_minnum_f64x2:
+; CHECK:         .functype test_minnum_f64x2 (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f64x2.relaxed_min
+; CHECK-NEXT:    # fallthrough-return
+  %result = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %result
+}
+
+define <2 x double> @test_minimumnum_f64x2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_minimumnum_f64x2:
+; CHECK:         .functype test_minimumnum_f64x2 (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f64x2.relaxed_min
+; CHECK-NEXT:    # fallthrough-return
+  %result = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %result
+}
+
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.fminimumnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.fminimumnum.v2f64(<2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/WebAssembly/simd-vector-trunc.ll b/llvm/test/CodeGen/WebAssembly/simd-vector-trunc.ll
index 123438d..f58456b 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-vector-trunc.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-vector-trunc.ll
@@ -94,6 +94,19 @@ entry:
   ret <16 x i8> %0
 }
 
+define <8 x i8>  @trunc8i16_8i8(<8 x i16> %a) {
+; CHECK-LABEL: trunc8i16_8i8:
+; CHECK:         .functype trunc8i16_8i8 (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    # fallthrough-return
+entry:
+  %0 = trunc <8 x i16> %a to <8 x i8>
+  ret <8 x i8> %0
+}
+
 define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
 ; CHECK-LABEL: trunc8i64_8i16:
 ; CHECK:         .functype trunc8i64_8i16 (v128, v128, v128, v128) -> (v128)
@@ -139,3 +152,29 @@ entry:
   %0 = trunc <8 x i32> %a to <8 x i16>
   ret <8 x i16> %0
 }
+
+define <4 x i16> @trunc4i32_4i16(<4 x i32> %a) {
+; CHECK-LABEL: trunc4i32_4i16:
+; CHECK:         .functype trunc4i32_4i16 (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK-NEXT:    # fallthrough-return
+entry:
+  %0 = trunc <4 x i32> %a to <4 x i16>
+  ret <4 x i16> %0
+}
+
+define <4 x i8> @trunc4i32_4i8(<4 x i32> %a) {
+; CHECK-LABEL: trunc4i32_4i8:
+; CHECK:         .functype trunc4i32_4i8 (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    # fallthrough-return
+entry:
+  %0 = trunc <4 x i32> %a to <4 x i8>
+  ret <4 x i8> %0
+}
diff --git a/llvm/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/llvm/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
index bea11e9..940fe8c 100644
--- a/llvm/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
+++ b/llvm/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=i686-- -mattr=-sse | FileCheck %s -check-prefix=WITHNANS
-; RUN: llc < %s -mtriple=i686-- -mattr=-sse -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s -check-prefix=NONANS
+; RUN: llc < %s -mtriple=i686-- -mattr=-sse -enable-no-nans-fp-math | FileCheck %s -check-prefix=NONANS
 
 ; WITHNANS-LABEL: test:
 ; WITHNANS: setnp
diff --git a/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll b/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
index 8411a40..ff7a99a 100644
--- a/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
+++ b/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -enable-unsafe-fp-math -mtriple=i686-- | FileCheck %s
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s
 ; rdar://5902801
 
 declare void @test2()
diff --git a/llvm/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll b/llvm/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll
index 6ebbb2e..0e0e20f 100644
--- a/llvm/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll
+++ b/llvm/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -enable-unsafe-fp-math
+; RUN: llc < %s
 ; <rdar://problem/12180135>
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.8.0"
diff --git a/llvm/test/CodeGen/X86/avx-minmax.ll b/llvm/test/CodeGen/X86/avx-minmax.ll
index 6da04c5..8e4b6c6 100644
--- a/llvm/test/CodeGen/X86/avx-minmax.ll
+++ b/llvm/test/CodeGen/X86/avx-minmax.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -enable-no-nans-fp-math | FileCheck %s
 
 define <2 x double> @maxpd(<2 x double> %x, <2 x double> %y) {
 ; CHECK-LABEL: maxpd:
diff --git a/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
index f827998..eb9de8a 100644
--- a/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
+++ b/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK_UNSAFE
 ; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -mattr=+avx512f | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64 -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s
 
 define <16 x float> @test_max_v16f32(ptr %a_ptr, <16 x float> %b)  {
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll
index 5d9784a..1147d79 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s
 
 define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce, <32 x half> %rhs.coerce) {
 ; CHECK-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll
index b58bae9..1c4d9c6 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s
 
 define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 {
 ; CHECK-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll
index 92bdebb..a8ff969 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s
 
 define dso_local <32 x half> @test1(<32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 {
 ; CHECK-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/bf16-fast-isel.ll b/llvm/test/CodeGen/X86/bf16-fast-isel.ll
new file mode 100644
index 0000000..c659e0e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bf16-fast-isel.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --fast-isel < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define i8 @test_direct_call(ptr %f) nounwind {
+; CHECK-LABEL: test_direct_call:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    callq foo@PLT
+; CHECK-NEXT:    callq bar@PLT
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    retq
+entry:
+  %call = call bfloat @foo(ptr %f)
+  %call2 = call zeroext i8 @bar(bfloat %call)
+  ret i8 %call2
+}
+
+define i8 @test_fast_direct_call(ptr %f) nounwind {
+; CHECK-LABEL: test_fast_direct_call:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    callq foo_fast@PLT
+; CHECK-NEXT:    callq bar@PLT
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    retq
+entry:
+  %call = call fastcc bfloat @foo_fast(ptr %f)
+  %call2 = call zeroext i8 @bar(bfloat %call)
+  ret i8 %call2
+}
+
+define i8 @test_indirect_all(ptr %fptr, ptr %f) nounwind {
+; CHECK-LABEL: test_indirect_all:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    movq %rsi, %rdi
+; CHECK-NEXT:    callq foo@PLT
+; CHECK-NEXT:    callq *%rbx
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+entry:
+  %call = call bfloat @foo(ptr %f)
+  %call2 = call zeroext i8 %fptr(bfloat %call)
+  ret i8 %call2
+}
+
+define i8 @test_fast_indirect_all(ptr %fptr, ptr %f) nounwind {
+; CHECK-LABEL: test_fast_indirect_all:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    movq %rsi, %rdi
+; CHECK-NEXT:    callq foo@PLT
+; CHECK-NEXT:    callq *%rbx
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+entry:
+  %call = call fastcc bfloat @foo(ptr %f)
+  %call2 = call zeroext i8 %fptr(bfloat %call)
+  ret i8 %call2
+}
+
+declare bfloat @foo(ptr %f)
+declare zeroext i8 @bar(bfloat)
+declare fastcc bfloat @foo_fast(ptr %f)
diff --git a/llvm/test/CodeGen/X86/bitcnt-big-integer.ll b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
new file mode 100644
index 0000000..13149d7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
@@ -0,0 +1,3021 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=CHECK,AVX512
+
+;
+; CTPOP
+;
+
+define i32 @test_ctpop_i128(i128 %a0) nounwind {
+; CHECK-LABEL: test_ctpop_i128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    popcntq %rsi, %rcx
+; CHECK-NEXT:    popcntq %rdi, %rax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    retq
+  %cnt = call i128 @llvm.ctpop.i128(i128 %a0)
+  %res = trunc i128 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_ctpop_i128(ptr %p0) nounwind {
+; CHECK-LABEL: load_ctpop_i128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    popcntq 8(%rdi), %rcx
+; CHECK-NEXT:    popcntq (%rdi), %rax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    retq
+  %a0 = load i128, ptr %p0
+  %cnt = call i128 @llvm.ctpop.i128(i128 %a0)
+  %res = trunc i128 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @test_ctpop_i256(i256 %a0) nounwind {
+; CHECK-LABEL: test_ctpop_i256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    popcntq %rcx, %rax
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    popcntq %rdx, %rcx
+; CHECK-NEXT:    addl %eax, %ecx
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    popcntq %rsi, %rdx
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    popcntq %rdi, %rax
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    retq
+  %cnt = call i256 @llvm.ctpop.i256(i256 %a0)
+  %res = trunc i256 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_ctpop_i256(ptr %p0) nounwind {
+; SSE-LABEL: load_ctpop_i256:
+; SSE:       # %bb.0:
+; SSE-NEXT:    popcntq 24(%rdi), %rcx
+; SSE-NEXT:    popcntq 16(%rdi), %rdx
+; SSE-NEXT:    popcntq 8(%rdi), %rsi
+; SSE-NEXT:    popcntq (%rdi), %rax
+; SSE-NEXT:    addl %ecx, %edx
+; SSE-NEXT:    addl %esi, %eax
+; SSE-NEXT:    addl %edx, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_ctpop_i256:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    popcntq 24(%rdi), %rax
+; AVX2-NEXT:    popcntq 16(%rdi), %rcx
+; AVX2-NEXT:    addl %eax, %ecx
+; AVX2-NEXT:    popcntq 8(%rdi), %rdx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq (%rdi), %rax
+; AVX2-NEXT:    addl %edx, %eax
+; AVX2-NEXT:    addl %ecx, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_ctpop_i256:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    popcntq 24(%rdi), %rax
+; AVX512-NEXT:    popcntq 16(%rdi), %rcx
+; AVX512-NEXT:    addl %eax, %ecx
+; AVX512-NEXT:    popcntq 8(%rdi), %rdx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq (%rdi), %rax
+; AVX512-NEXT:    addl %edx, %eax
+; AVX512-NEXT:    addl %ecx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %a0 = load i256, ptr %p0
+  %cnt = call i256 @llvm.ctpop.i256(i256 %a0)
+  %res = trunc i256 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @test_ctpop_i512(i512 %a0) nounwind {
+; CHECK-LABEL: test_ctpop_i512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; CHECK-NEXT:    addl %eax, %r10d
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    popcntq %r9, %rax
+; CHECK-NEXT:    popcntq %r8, %r8
+; CHECK-NEXT:    addl %eax, %r8d
+; CHECK-NEXT:    addl %r10d, %r8d
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    popcntq %rcx, %rax
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    popcntq %rdx, %rcx
+; CHECK-NEXT:    addl %eax, %ecx
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    popcntq %rsi, %rdx
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    popcntq %rdi, %rax
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    addl %r8d, %eax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    retq
+  %cnt = call i512 @llvm.ctpop.i512(i512 %a0)
+  %res = trunc i512 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_ctpop_i512(ptr %p0) nounwind {
+; SSE-LABEL: load_ctpop_i512:
+; SSE:       # %bb.0:
+; SSE-NEXT:    popcntq 56(%rdi), %rax
+; SSE-NEXT:    popcntq 48(%rdi), %rcx
+; SSE-NEXT:    popcntq 40(%rdi), %rdx
+; SSE-NEXT:    popcntq 32(%rdi), %rsi
+; SSE-NEXT:    addl %eax, %ecx
+; SSE-NEXT:    addl %edx, %esi
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq 24(%rdi), %rax
+; SSE-NEXT:    addl %ecx, %esi
+; SSE-NEXT:    xorl %ecx, %ecx
+; SSE-NEXT:    popcntq 16(%rdi), %rcx
+; SSE-NEXT:    addl %eax, %ecx
+; SSE-NEXT:    xorl %edx, %edx
+; SSE-NEXT:    popcntq 8(%rdi), %rdx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq (%rdi), %rax
+; SSE-NEXT:    addl %edx, %eax
+; SSE-NEXT:    addl %ecx, %eax
+; SSE-NEXT:    addl %esi, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_ctpop_i512:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    popcntq 56(%rdi), %rax
+; AVX2-NEXT:    popcntq 48(%rdi), %rcx
+; AVX2-NEXT:    addl %eax, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq 40(%rdi), %rax
+; AVX2-NEXT:    popcntq 32(%rdi), %rdx
+; AVX2-NEXT:    addl %eax, %edx
+; AVX2-NEXT:    addl %ecx, %edx
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    popcntq 24(%rdi), %rcx
+; AVX2-NEXT:    popcntq 16(%rdi), %rsi
+; AVX2-NEXT:    popcntq 8(%rdi), %r8
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq (%rdi), %rax
+; AVX2-NEXT:    addl %ecx, %esi
+; AVX2-NEXT:    addl %r8d, %eax
+; AVX2-NEXT:    addl %esi, %eax
+; AVX2-NEXT:    addl %edx, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_ctpop_i512:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    popcntq 56(%rdi), %rax
+; AVX512-NEXT:    popcntq 48(%rdi), %rcx
+; AVX512-NEXT:    addl %eax, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq 40(%rdi), %rax
+; AVX512-NEXT:    popcntq 32(%rdi), %rdx
+; AVX512-NEXT:    addl %eax, %edx
+; AVX512-NEXT:    addl %ecx, %edx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq 24(%rdi), %rax
+; AVX512-NEXT:    xorl %ecx, %ecx
+; AVX512-NEXT:    popcntq 16(%rdi), %rcx
+; AVX512-NEXT:    popcntq 8(%rdi), %rsi
+; AVX512-NEXT:    addl %eax, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq (%rdi), %rax
+; AVX512-NEXT:    addl %esi, %eax
+; AVX512-NEXT:    addl %ecx, %eax
+; AVX512-NEXT:    addl %edx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %a0 = load i512, ptr %p0
+  %cnt = call i512 @llvm.ctpop.i512(i512 %a0)
+  %res = trunc i512 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @test_ctpop_i1024(i1024 %a0) nounwind {
+; SSE-LABEL: test_ctpop_i1024:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rbx
+; SSE-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT:    addl %eax, %r10d
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT:    addl %r11d, %eax
+; SSE-NEXT:    xorl %r11d, %r11d
+; SSE-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT:    xorl %ebx, %ebx
+; SSE-NEXT:    popcntq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT:    addl %r10d, %eax
+; SSE-NEXT:    addl %r11d, %ebx
+; SSE-NEXT:    xorl %r11d, %r11d
+; SSE-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT:    xorl %r10d, %r10d
+; SSE-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT:    addl %r11d, %r10d
+; SSE-NEXT:    addl %ebx, %r10d
+; SSE-NEXT:    xorl %r11d, %r11d
+; SSE-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT:    xorl %ebx, %ebx
+; SSE-NEXT:    popcntq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT:    addl %eax, %r10d
+; SSE-NEXT:    addl %r11d, %ebx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq %r9, %rax
+; SSE-NEXT:    popcntq %r8, %r8
+; SSE-NEXT:    addl %eax, %r8d
+; SSE-NEXT:    addl %ebx, %r8d
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq %rcx, %rax
+; SSE-NEXT:    xorl %ecx, %ecx
+; SSE-NEXT:    popcntq %rdx, %rcx
+; SSE-NEXT:    addl %eax, %ecx
+; SSE-NEXT:    xorl %edx, %edx
+; SSE-NEXT:    popcntq %rsi, %rdx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq %rdi, %rax
+; SSE-NEXT:    addl %edx, %eax
+; SSE-NEXT:    addl %ecx, %eax
+; SSE-NEXT:    addl %r8d, %eax
+; SSE-NEXT:    addl %r10d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: test_ctpop_i1024:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT:    addl %eax, %r10d
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT:    addl %eax, %r11d
+; AVX2-NEXT:    addl %r10d, %r11d
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT:    xorl %ebx, %ebx
+; AVX2-NEXT:    popcntq {{[0-9]+}}(%rsp), %rbx
+; AVX2-NEXT:    xorl %r14d, %r14d
+; AVX2-NEXT:    popcntq {{[0-9]+}}(%rsp), %r14
+; AVX2-NEXT:    addl %eax, %ebx
+; AVX2-NEXT:    xorl %r10d, %r10d
+; AVX2-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT:    addl %r14d, %r10d
+; AVX2-NEXT:    addl %ebx, %r10d
+; AVX2-NEXT:    addl %r11d, %r10d
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT:    xorl %r11d, %r11d
+; AVX2-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT:    addl %eax, %r11d
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq %r9, %rax
+; AVX2-NEXT:    popcntq %r8, %r8
+; AVX2-NEXT:    addl %eax, %r8d
+; AVX2-NEXT:    addl %r11d, %r8d
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq %rcx, %rax
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    popcntq %rdx, %rcx
+; AVX2-NEXT:    addl %eax, %ecx
+; AVX2-NEXT:    xorl %edx, %edx
+; AVX2-NEXT:    popcntq %rsi, %rdx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq %rdi, %rax
+; AVX2-NEXT:    addl %edx, %eax
+; AVX2-NEXT:    addl %ecx, %eax
+; AVX2-NEXT:    addl %r8d, %eax
+; AVX2-NEXT:    addl %r10d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_ctpop_i1024:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %r14
+; AVX512-NEXT:    pushq %rbx
+; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT:    addl %eax, %r10d
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT:    addl %eax, %r11d
+; AVX512-NEXT:    addl %r10d, %r11d
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT:    xorl %ebx, %ebx
+; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %rbx
+; AVX512-NEXT:    xorl %r14d, %r14d
+; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %r14
+; AVX512-NEXT:    addl %eax, %ebx
+; AVX512-NEXT:    xorl %r10d, %r10d
+; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT:    addl %r14d, %r10d
+; AVX512-NEXT:    addl %ebx, %r10d
+; AVX512-NEXT:    addl %r11d, %r10d
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT:    xorl %r11d, %r11d
+; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT:    addl %eax, %r11d
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq %r9, %rax
+; AVX512-NEXT:    popcntq %r8, %r8
+; AVX512-NEXT:    addl %eax, %r8d
+; AVX512-NEXT:    addl %r11d, %r8d
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq %rcx, %rax
+; AVX512-NEXT:    xorl %ecx, %ecx
+; AVX512-NEXT:    popcntq %rdx, %rcx
+; AVX512-NEXT:    addl %eax, %ecx
+; AVX512-NEXT:    xorl %edx, %edx
+; AVX512-NEXT:    popcntq %rsi, %rdx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq %rdi, %rax
+; AVX512-NEXT:    addl %edx, %eax
+; AVX512-NEXT:    addl %ecx, %eax
+; AVX512-NEXT:    addl %r8d, %eax
+; AVX512-NEXT:    addl %r10d, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    popq %rbx
+; AVX512-NEXT:    popq %r14
+; AVX512-NEXT:    retq
+  %cnt = call i1024 @llvm.ctpop.i1024(i1024 %a0)
+  %res = trunc i1024 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_ctpop_i1024(ptr %p0) nounwind {
+; SSE-LABEL: load_ctpop_i1024:
+; SSE:       # %bb.0:
+; SSE-NEXT:    popcntq 120(%rdi), %rax
+; SSE-NEXT:    popcntq 112(%rdi), %rcx
+; SSE-NEXT:    popcntq 104(%rdi), %rdx
+; SSE-NEXT:    popcntq 96(%rdi), %rsi
+; SSE-NEXT:    addl %eax, %ecx
+; SSE-NEXT:    addl %edx, %esi
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq 88(%rdi), %rax
+; SSE-NEXT:    addl %ecx, %esi
+; SSE-NEXT:    xorl %edx, %edx
+; SSE-NEXT:    popcntq 80(%rdi), %rdx
+; SSE-NEXT:    addl %eax, %edx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq 72(%rdi), %rax
+; SSE-NEXT:    xorl %ecx, %ecx
+; SSE-NEXT:    popcntq 64(%rdi), %rcx
+; SSE-NEXT:    addl %eax, %ecx
+; SSE-NEXT:    addl %edx, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq 56(%rdi), %rax
+; SSE-NEXT:    addl %esi, %ecx
+; SSE-NEXT:    xorl %edx, %edx
+; SSE-NEXT:    popcntq 48(%rdi), %rdx
+; SSE-NEXT:    addl %eax, %edx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq 40(%rdi), %rax
+; SSE-NEXT:    xorl %esi, %esi
+; SSE-NEXT:    popcntq 32(%rdi), %rsi
+; SSE-NEXT:    addl %eax, %esi
+; SSE-NEXT:    addl %edx, %esi
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq 24(%rdi), %rax
+; SSE-NEXT:    xorl %edx, %edx
+; SSE-NEXT:    popcntq 16(%rdi), %rdx
+; SSE-NEXT:    popcntq 8(%rdi), %r8
+; SSE-NEXT:    addl %eax, %edx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq (%rdi), %rax
+; SSE-NEXT:    addl %r8d, %eax
+; SSE-NEXT:    addl %edx, %eax
+; SSE-NEXT:    addl %esi, %eax
+; SSE-NEXT:    addl %ecx, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_ctpop_i1024:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    popcntq 120(%rdi), %rax
+; AVX2-NEXT:    popcntq 112(%rdi), %rcx
+; AVX2-NEXT:    addl %eax, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq 104(%rdi), %rax
+; AVX2-NEXT:    popcntq 96(%rdi), %rdx
+; AVX2-NEXT:    addl %eax, %edx
+; AVX2-NEXT:    addl %ecx, %edx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq 88(%rdi), %rax
+; AVX2-NEXT:    popcntq 80(%rdi), %rsi
+; AVX2-NEXT:    popcntq 72(%rdi), %r8
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    popcntq 64(%rdi), %rcx
+; AVX2-NEXT:    addl %eax, %esi
+; AVX2-NEXT:    addl %r8d, %ecx
+; AVX2-NEXT:    addl %esi, %ecx
+; AVX2-NEXT:    addl %edx, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq 56(%rdi), %rax
+; AVX2-NEXT:    xorl %edx, %edx
+; AVX2-NEXT:    popcntq 48(%rdi), %rdx
+; AVX2-NEXT:    xorl %esi, %esi
+; AVX2-NEXT:    popcntq 40(%rdi), %rsi
+; AVX2-NEXT:    xorl %r8d, %r8d
+; AVX2-NEXT:    popcntq 32(%rdi), %r8
+; AVX2-NEXT:    addl %eax, %edx
+; AVX2-NEXT:    addl %esi, %r8d
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq 24(%rdi), %rax
+; AVX2-NEXT:    addl %edx, %r8d
+; AVX2-NEXT:    xorl %edx, %edx
+; AVX2-NEXT:    popcntq 16(%rdi), %rdx
+; AVX2-NEXT:    addl %eax, %edx
+; AVX2-NEXT:    xorl %esi, %esi
+; AVX2-NEXT:    popcntq 8(%rdi), %rsi
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq (%rdi), %rax
+; AVX2-NEXT:    addl %esi, %eax
+; AVX2-NEXT:    addl %edx, %eax
+; AVX2-NEXT:    addl %r8d, %eax
+; AVX2-NEXT:    addl %ecx, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_ctpop_i1024:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    popcntq 120(%rdi), %rax
+; AVX512-NEXT:    popcntq 112(%rdi), %rcx
+; AVX512-NEXT:    addl %eax, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq 104(%rdi), %rax
+; AVX512-NEXT:    popcntq 96(%rdi), %rdx
+; AVX512-NEXT:    addl %eax, %edx
+; AVX512-NEXT:    addl %ecx, %edx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq 88(%rdi), %rax
+; AVX512-NEXT:    popcntq 80(%rdi), %rsi
+; AVX512-NEXT:    popcntq 72(%rdi), %r8
+; AVX512-NEXT:    addl %eax, %esi
+; AVX512-NEXT:    xorl %ecx, %ecx
+; AVX512-NEXT:    popcntq 64(%rdi), %rcx
+; AVX512-NEXT:    addl %r8d, %ecx
+; AVX512-NEXT:    addl %esi, %ecx
+; AVX512-NEXT:    addl %edx, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq 56(%rdi), %rax
+; AVX512-NEXT:    xorl %edx, %edx
+; AVX512-NEXT:    popcntq 48(%rdi), %rdx
+; AVX512-NEXT:    xorl %esi, %esi
+; AVX512-NEXT:    popcntq 40(%rdi), %rsi
+; AVX512-NEXT:    addl %eax, %edx
+; AVX512-NEXT:    xorl %r8d, %r8d
+; AVX512-NEXT:    popcntq 32(%rdi), %r8
+; AVX512-NEXT:    addl %esi, %r8d
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq 24(%rdi), %rax
+; AVX512-NEXT:    addl %edx, %r8d
+; AVX512-NEXT:    xorl %edx, %edx
+; AVX512-NEXT:    popcntq 16(%rdi), %rdx
+; AVX512-NEXT:    addl %eax, %edx
+; AVX512-NEXT:    xorl %esi, %esi
+; AVX512-NEXT:    popcntq 8(%rdi), %rsi
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    popcntq (%rdi), %rax
+; AVX512-NEXT:    addl %esi, %eax
+; AVX512-NEXT:    addl %edx, %eax
+; AVX512-NEXT:    addl %r8d, %eax
+; AVX512-NEXT:    addl %ecx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %a0 = load i1024, ptr %p0
+  %cnt = call i1024 @llvm.ctpop.i1024(i1024 %a0)
+  %res = trunc i1024 %cnt to i32
+  ret i32 %res
+}
+
+;
+; CTLZ
+;
+
+define i32 @test_ctlz_i128(i128 %a0) nounwind {
+; SSE-LABEL: test_ctlz_i128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    bsrq %rsi, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    movl $127, %eax
+; SSE-NEXT:    bsrq %rdi, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rsi, %rsi
+; SSE-NEXT:    cmovnel %ecx, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: test_ctlz_i128:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    lzcntq %rsi, %rcx
+; AVX2-NEXT:    lzcntq %rdi, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rsi, %rsi
+; AVX2-NEXT:    cmovnel %ecx, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_ctlz_i128:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    lzcntq %rsi, %rcx
+; AVX512-NEXT:    lzcntq %rdi, %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    cmovnel %ecx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %cnt = call i128 @llvm.ctlz.i128(i128 %a0, i1 0)
+  %res = trunc i128 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_ctlz_i128(ptr %p0) nounwind {
+; SSE-LABEL: load_ctlz_i128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq 8(%rdi), %rcx
+; SSE-NEXT:    bsrq %rcx, %rdx
+; SSE-NEXT:    xorl $63, %edx
+; SSE-NEXT:    movl $127, %eax
+; SSE-NEXT:    bsrq (%rdi), %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rcx, %rcx
+; SSE-NEXT:    cmovnel %edx, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_ctlz_i128:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    movq 8(%rdi), %rcx
+; AVX2-NEXT:    lzcntq %rcx, %rdx
+; AVX2-NEXT:    lzcntq (%rdi), %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rcx, %rcx
+; AVX2-NEXT:    cmovnel %edx, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_ctlz_i128:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movq 8(%rdi), %rcx
+; AVX512-NEXT:    lzcntq %rcx, %rdx
+; AVX512-NEXT:    lzcntq (%rdi), %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rcx, %rcx
+; AVX512-NEXT:    cmovnel %edx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %a0 = load i128, ptr %p0
+  %cnt = call i128 @llvm.ctlz.i128(i128 %a0, i1 0)
+  %res = trunc i128 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @test_ctlz_i256(i256 %a0) nounwind {
+; SSE-LABEL: test_ctlz_i256:
+; SSE:       # %bb.0:
+; SSE-NEXT:    bsrq %rcx, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %rdx, %r8
+; SSE-NEXT:    xorl $63, %r8d
+; SSE-NEXT:    orl $64, %r8d
+; SSE-NEXT:    testq %rcx, %rcx
+; SSE-NEXT:    cmovnel %eax, %r8d
+; SSE-NEXT:    bsrq %rsi, %r9
+; SSE-NEXT:    xorl $63, %r9d
+; SSE-NEXT:    movl $127, %eax
+; SSE-NEXT:    bsrq %rdi, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rsi, %rsi
+; SSE-NEXT:    cmovnel %r9d, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %rcx, %rdx
+; SSE-NEXT:    cmovnel %r8d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: test_ctlz_i256:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    lzcntq %rcx, %rax
+; AVX2-NEXT:    lzcntq %rdx, %r8
+; AVX2-NEXT:    addl $64, %r8d
+; AVX2-NEXT:    testq %rcx, %rcx
+; AVX2-NEXT:    cmovnel %eax, %r8d
+; AVX2-NEXT:    lzcntq %rsi, %r9
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %rdi, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rsi, %rsi
+; AVX2-NEXT:    cmovnel %r9d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %rcx, %rdx
+; AVX2-NEXT:    cmovnel %r8d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_ctlz_i256:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    lzcntq %rcx, %rax
+; AVX512-NEXT:    lzcntq %rdx, %r8
+; AVX512-NEXT:    addl $64, %r8d
+; AVX512-NEXT:    testq %rcx, %rcx
+; AVX512-NEXT:    cmovnel %eax, %r8d
+; AVX512-NEXT:    lzcntq %rsi, %r9
+; AVX512-NEXT:    lzcntq %rdi, %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    cmovnel %r9d, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %rcx, %rdx
+; AVX512-NEXT:    cmovnel %r8d, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %cnt = call i256 @llvm.ctlz.i256(i256 %a0, i1 0)
+  %res = trunc i256 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_ctlz_i256(ptr %p0) nounwind {
+; SSE-LABEL: load_ctlz_i256:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq 16(%rdi), %rcx
+; SSE-NEXT:    movq 24(%rdi), %rdx
+; SSE-NEXT:    bsrq %rdx, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %rcx, %rsi
+; SSE-NEXT:    xorl $63, %esi
+; SSE-NEXT:    orl $64, %esi
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %eax, %esi
+; SSE-NEXT:    movq 8(%rdi), %r8
+; SSE-NEXT:    bsrq %r8, %r9
+; SSE-NEXT:    xorl $63, %r9d
+; SSE-NEXT:    movl $127, %eax
+; SSE-NEXT:    bsrq (%rdi), %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %r8, %r8
+; SSE-NEXT:    cmovnel %r9d, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %rdx, %rcx
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_ctlz_i256:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    movq 16(%rdi), %rcx
+; AVX2-NEXT:    movq 24(%rdi), %rdx
+; AVX2-NEXT:    lzcntq %rdx, %rax
+; AVX2-NEXT:    lzcntq %rcx, %rsi
+; AVX2-NEXT:    addl $64, %esi
+; AVX2-NEXT:    testq %rdx, %rdx
+; AVX2-NEXT:    cmovnel %eax, %esi
+; AVX2-NEXT:    movq 8(%rdi), %r8
+; AVX2-NEXT:    lzcntq %r8, %r9
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq (%rdi), %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %r8, %r8
+; AVX2-NEXT:    cmovnel %r9d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %rdx, %rcx
+; AVX2-NEXT:    cmovnel %esi, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_ctlz_i256:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movq 8(%rdi), %rcx
+; AVX512-NEXT:    movq 16(%rdi), %rdx
+; AVX512-NEXT:    movq 24(%rdi), %rsi
+; AVX512-NEXT:    lzcntq %rsi, %rax
+; AVX512-NEXT:    lzcntq %rdx, %r8
+; AVX512-NEXT:    addl $64, %r8d
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    cmovnel %eax, %r8d
+; AVX512-NEXT:    lzcntq %rcx, %r9
+; AVX512-NEXT:    lzcntq (%rdi), %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rcx, %rcx
+; AVX512-NEXT:    cmovnel %r9d, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %rsi, %rdx
+; AVX512-NEXT:    cmovnel %r8d, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %a0 = load i256, ptr %p0
+  %cnt = call i256 @llvm.ctlz.i256(i256 %a0, i1 0)
+  %res = trunc i256 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @test_ctlz_i512(i512 %a0) nounwind {
+; SSE-LABEL: test_ctlz_i512:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %r15
+; SSE-NEXT:    pushq %r14
+; SSE-NEXT:    pushq %rbx
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT:    bsrq %r11, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %r10, %r14
+; SSE-NEXT:    xorl $63, %r14d
+; SSE-NEXT:    orl $64, %r14d
+; SSE-NEXT:    testq %r11, %r11
+; SSE-NEXT:    cmovnel %eax, %r14d
+; SSE-NEXT:    bsrq %r9, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %r8, %rbx
+; SSE-NEXT:    xorl $63, %ebx
+; SSE-NEXT:    orl $64, %ebx
+; SSE-NEXT:    testq %r9, %r9
+; SSE-NEXT:    cmovnel %eax, %ebx
+; SSE-NEXT:    subl $-128, %ebx
+; SSE-NEXT:    movq %r10, %rax
+; SSE-NEXT:    orq %r11, %rax
+; SSE-NEXT:    cmovnel %r14d, %ebx
+; SSE-NEXT:    bsrq %rcx, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %rdx, %r14
+; SSE-NEXT:    xorl $63, %r14d
+; SSE-NEXT:    orl $64, %r14d
+; SSE-NEXT:    testq %rcx, %rcx
+; SSE-NEXT:    cmovnel %eax, %r14d
+; SSE-NEXT:    bsrq %rsi, %r15
+; SSE-NEXT:    xorl $63, %r15d
+; SSE-NEXT:    movl $127, %eax
+; SSE-NEXT:    bsrq %rdi, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rsi, %rsi
+; SSE-NEXT:    cmovnel %r15d, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %rcx, %rdx
+; SSE-NEXT:    cmovnel %r14d, %eax
+; SSE-NEXT:    addl $256, %eax # imm = 0x100
+; SSE-NEXT:    orq %r11, %r9
+; SSE-NEXT:    orq %r10, %r8
+; SSE-NEXT:    orq %r9, %r8
+; SSE-NEXT:    cmovnel %ebx, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    popq %r14
+; SSE-NEXT:    popq %r15
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: test_ctlz_i512:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %r15
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT:    lzcntq %r11, %rax
+; AVX2-NEXT:    xorl %r14d, %r14d
+; AVX2-NEXT:    lzcntq %r10, %r14
+; AVX2-NEXT:    addl $64, %r14d
+; AVX2-NEXT:    testq %r11, %r11
+; AVX2-NEXT:    cmovnel %eax, %r14d
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %r9, %rax
+; AVX2-NEXT:    xorl %ebx, %ebx
+; AVX2-NEXT:    lzcntq %r8, %rbx
+; AVX2-NEXT:    addl $64, %ebx
+; AVX2-NEXT:    testq %r9, %r9
+; AVX2-NEXT:    cmovnel %eax, %ebx
+; AVX2-NEXT:    subl $-128, %ebx
+; AVX2-NEXT:    movq %r10, %rax
+; AVX2-NEXT:    orq %r11, %rax
+; AVX2-NEXT:    cmovnel %r14d, %ebx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %rcx, %rax
+; AVX2-NEXT:    xorl %r14d, %r14d
+; AVX2-NEXT:    lzcntq %rdx, %r14
+; AVX2-NEXT:    addl $64, %r14d
+; AVX2-NEXT:    testq %rcx, %rcx
+; AVX2-NEXT:    cmovnel %eax, %r14d
+; AVX2-NEXT:    xorl %r15d, %r15d
+; AVX2-NEXT:    lzcntq %rsi, %r15
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %rdi, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rsi, %rsi
+; AVX2-NEXT:    cmovnel %r15d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %rcx, %rdx
+; AVX2-NEXT:    cmovnel %r14d, %eax
+; AVX2-NEXT:    addl $256, %eax # imm = 0x100
+; AVX2-NEXT:    orq %r11, %r9
+; AVX2-NEXT:    orq %r10, %r8
+; AVX2-NEXT:    orq %r9, %r8
+; AVX2-NEXT:    cmovnel %ebx, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    popq %r15
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_ctlz_i512:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %r15
+; AVX512-NEXT:    pushq %r14
+; AVX512-NEXT:    pushq %rbx
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT:    lzcntq %r11, %rax
+; AVX512-NEXT:    lzcntq %r10, %r14
+; AVX512-NEXT:    addl $64, %r14d
+; AVX512-NEXT:    testq %r11, %r11
+; AVX512-NEXT:    cmovnel %eax, %r14d
+; AVX512-NEXT:    lzcntq %r9, %rax
+; AVX512-NEXT:    lzcntq %r8, %rbx
+; AVX512-NEXT:    addl $64, %ebx
+; AVX512-NEXT:    testq %r9, %r9
+; AVX512-NEXT:    cmovnel %eax, %ebx
+; AVX512-NEXT:    subl $-128, %ebx
+; AVX512-NEXT:    movq %r10, %rax
+; AVX512-NEXT:    orq %r11, %rax
+; AVX512-NEXT:    cmovnel %r14d, %ebx
+; AVX512-NEXT:    lzcntq %rcx, %rax
+; AVX512-NEXT:    lzcntq %rdx, %r14
+; AVX512-NEXT:    addl $64, %r14d
+; AVX512-NEXT:    testq %rcx, %rcx
+; AVX512-NEXT:    cmovnel %eax, %r14d
+; AVX512-NEXT:    lzcntq %rsi, %r15
+; AVX512-NEXT:    lzcntq %rdi, %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    cmovnel %r15d, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %rcx, %rdx
+; AVX512-NEXT:    cmovnel %r14d, %eax
+; AVX512-NEXT:    addl $256, %eax # imm = 0x100
+; AVX512-NEXT:    orq %r11, %r9
+; AVX512-NEXT:    orq %r10, %r8
+; AVX512-NEXT:    orq %r9, %r8
+; AVX512-NEXT:    cmovnel %ebx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    popq %rbx
+; AVX512-NEXT:    popq %r14
+; AVX512-NEXT:    popq %r15
+; AVX512-NEXT:    retq
+  %cnt = call i512 @llvm.ctlz.i512(i512 %a0, i1 0)
+  %res = trunc i512 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_ctlz_i512(ptr %p0) nounwind {
+; SSE-LABEL: load_ctlz_i512:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %r15
+; SSE-NEXT:    pushq %r14
+; SSE-NEXT:    pushq %rbx
+; SSE-NEXT:    movq 8(%rdi), %r10
+; SSE-NEXT:    movq 16(%rdi), %r9
+; SSE-NEXT:    movq 32(%rdi), %rcx
+; SSE-NEXT:    movq 40(%rdi), %rdx
+; SSE-NEXT:    movq 48(%rdi), %rsi
+; SSE-NEXT:    movq 56(%rdi), %r8
+; SSE-NEXT:    bsrq %r8, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %rsi, %r14
+; SSE-NEXT:    xorl $63, %r14d
+; SSE-NEXT:    orl $64, %r14d
+; SSE-NEXT:    testq %r8, %r8
+; SSE-NEXT:    cmovnel %eax, %r14d
+; SSE-NEXT:    bsrq %rdx, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %rcx, %r11
+; SSE-NEXT:    xorl $63, %r11d
+; SSE-NEXT:    orl $64, %r11d
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %eax, %r11d
+; SSE-NEXT:    movq 24(%rdi), %rbx
+; SSE-NEXT:    subl $-128, %r11d
+; SSE-NEXT:    movq %rsi, %rax
+; SSE-NEXT:    orq %r8, %rax
+; SSE-NEXT:    cmovnel %r14d, %r11d
+; SSE-NEXT:    bsrq %rbx, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %r9, %r14
+; SSE-NEXT:    xorl $63, %r14d
+; SSE-NEXT:    orl $64, %r14d
+; SSE-NEXT:    testq %rbx, %rbx
+; SSE-NEXT:    cmovnel %eax, %r14d
+; SSE-NEXT:    bsrq %r10, %r15
+; SSE-NEXT:    xorl $63, %r15d
+; SSE-NEXT:    movl $127, %eax
+; SSE-NEXT:    bsrq (%rdi), %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %r10, %r10
+; SSE-NEXT:    cmovnel %r15d, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %rbx, %r9
+; SSE-NEXT:    cmovnel %r14d, %eax
+; SSE-NEXT:    addl $256, %eax # imm = 0x100
+; SSE-NEXT:    orq %r8, %rdx
+; SSE-NEXT:    orq %rsi, %rcx
+; SSE-NEXT:    orq %rdx, %rcx
+; SSE-NEXT:    cmovnel %r11d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    popq %r14
+; SSE-NEXT:    popq %r15
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_ctlz_i512:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %r15
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    movq 8(%rdi), %r10
+; AVX2-NEXT:    movq 16(%rdi), %r9
+; AVX2-NEXT:    movq 32(%rdi), %rcx
+; AVX2-NEXT:    movq 40(%rdi), %rdx
+; AVX2-NEXT:    movq 48(%rdi), %rsi
+; AVX2-NEXT:    movq 56(%rdi), %r8
+; AVX2-NEXT:    lzcntq %r8, %rax
+; AVX2-NEXT:    xorl %ebx, %ebx
+; AVX2-NEXT:    lzcntq %rsi, %rbx
+; AVX2-NEXT:    addl $64, %ebx
+; AVX2-NEXT:    testq %r8, %r8
+; AVX2-NEXT:    cmovnel %eax, %ebx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %rdx, %rax
+; AVX2-NEXT:    lzcntq %rcx, %r11
+; AVX2-NEXT:    addl $64, %r11d
+; AVX2-NEXT:    testq %rdx, %rdx
+; AVX2-NEXT:    cmovnel %eax, %r11d
+; AVX2-NEXT:    subl $-128, %r11d
+; AVX2-NEXT:    movq %rsi, %rax
+; AVX2-NEXT:    orq %r8, %rax
+; AVX2-NEXT:    cmovnel %ebx, %r11d
+; AVX2-NEXT:    movq 24(%rdi), %rbx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %rbx, %rax
+; AVX2-NEXT:    xorl %r14d, %r14d
+; AVX2-NEXT:    lzcntq %r9, %r14
+; AVX2-NEXT:    addl $64, %r14d
+; AVX2-NEXT:    testq %rbx, %rbx
+; AVX2-NEXT:    cmovnel %eax, %r14d
+; AVX2-NEXT:    xorl %r15d, %r15d
+; AVX2-NEXT:    lzcntq %r10, %r15
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq (%rdi), %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %r10, %r10
+; AVX2-NEXT:    cmovnel %r15d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %rbx, %r9
+; AVX2-NEXT:    cmovnel %r14d, %eax
+; AVX2-NEXT:    addl $256, %eax # imm = 0x100
+; AVX2-NEXT:    orq %r8, %rdx
+; AVX2-NEXT:    orq %rsi, %rcx
+; AVX2-NEXT:    orq %rdx, %rcx
+; AVX2-NEXT:    cmovnel %r11d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    popq %r15
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_ctlz_i512:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %r14
+; AVX512-NEXT:    pushq %rbx
+; AVX512-NEXT:    movq 8(%rdi), %r11
+; AVX512-NEXT:    movq 16(%rdi), %r9
+; AVX512-NEXT:    movq 24(%rdi), %r10
+; AVX512-NEXT:    movq 32(%rdi), %rcx
+; AVX512-NEXT:    movq 40(%rdi), %rdx
+; AVX512-NEXT:    movq 48(%rdi), %rsi
+; AVX512-NEXT:    movq 56(%rdi), %r8
+; AVX512-NEXT:    lzcntq %r8, %rax
+; AVX512-NEXT:    lzcntq %rsi, %r14
+; AVX512-NEXT:    addl $64, %r14d
+; AVX512-NEXT:    testq %r8, %r8
+; AVX512-NEXT:    cmovnel %eax, %r14d
+; AVX512-NEXT:    lzcntq %rdx, %rax
+; AVX512-NEXT:    lzcntq %rcx, %rbx
+; AVX512-NEXT:    addl $64, %ebx
+; AVX512-NEXT:    testq %rdx, %rdx
+; AVX512-NEXT:    cmovnel %eax, %ebx
+; AVX512-NEXT:    subl $-128, %ebx
+; AVX512-NEXT:    movq %rsi, %rax
+; AVX512-NEXT:    orq %r8, %rax
+; AVX512-NEXT:    cmovnel %r14d, %ebx
+; AVX512-NEXT:    lzcntq %r10, %rax
+; AVX512-NEXT:    lzcntq %r9, %r14
+; AVX512-NEXT:    addl $64, %r14d
+; AVX512-NEXT:    testq %r10, %r10
+; AVX512-NEXT:    cmovnel %eax, %r14d
+; AVX512-NEXT:    lzcntq (%rdi), %rax
+; AVX512-NEXT:    lzcntq %r11, %rdi
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %r11, %r11
+; AVX512-NEXT:    cmovnel %edi, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %r10, %r9
+; AVX512-NEXT:    cmovnel %r14d, %eax
+; AVX512-NEXT:    addl $256, %eax # imm = 0x100
+; AVX512-NEXT:    orq %r8, %rdx
+; AVX512-NEXT:    orq %rsi, %rcx
+; AVX512-NEXT:    orq %rdx, %rcx
+; AVX512-NEXT:    cmovnel %ebx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    popq %rbx
+; AVX512-NEXT:    popq %r14
+; AVX512-NEXT:    retq
+  %a0 = load i512, ptr %p0
+  %cnt = call i512 @llvm.ctlz.i512(i512 %a0, i1 0)
+  %res = trunc i512 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @test_ctlz_i1024(i1024 %a0) nounwind {
+; SSE-LABEL: test_ctlz_i1024:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rbp
+; SSE-NEXT:    pushq %r15
+; SSE-NEXT:    pushq %r14
+; SSE-NEXT:    pushq %r13
+; SSE-NEXT:    pushq %r12
+; SSE-NEXT:    pushq %rbx
+; SSE-NEXT:    movq %r9, %r11
+; SSE-NEXT:    movq %r8, %r9
+; SSE-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    movq %rdx, %r12
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r14
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r15
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT:    bsrq %r8, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %r15, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    orl $64, %ecx
+; SSE-NEXT:    testq %r8, %r8
+; SSE-NEXT:    cmovnel %eax, %ecx
+; SSE-NEXT:    bsrq %r14, %rdx
+; SSE-NEXT:    xorl $63, %edx
+; SSE-NEXT:    bsrq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    orl $64, %eax
+; SSE-NEXT:    testq %r14, %r14
+; SSE-NEXT:    cmovnel %edx, %eax
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r13
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    movq %r15, %rdx
+; SSE-NEXT:    orq %r8, %rdx
+; SSE-NEXT:    movq %r8, %r14
+; SSE-NEXT:    cmovnel %ecx, %eax
+; SSE-NEXT:    bsrq %r13, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    bsrq %rbx, %rdx
+; SSE-NEXT:    xorl $63, %edx
+; SSE-NEXT:    orl $64, %edx
+; SSE-NEXT:    testq %r13, %r13
+; SSE-NEXT:    cmovnel %ecx, %edx
+; SSE-NEXT:    bsrq %r10, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT:    bsrq %r8, %rbp
+; SSE-NEXT:    xorl $63, %ebp
+; SSE-NEXT:    orl $64, %ebp
+; SSE-NEXT:    testq %r10, %r10
+; SSE-NEXT:    cmovnel %ecx, %ebp
+; SSE-NEXT:    subl $-128, %ebp
+; SSE-NEXT:    movq %rbx, %rcx
+; SSE-NEXT:    orq %r13, %rcx
+; SSE-NEXT:    cmovnel %edx, %ebp
+; SSE-NEXT:    addl $256, %ebp # imm = 0x100
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; SSE-NEXT:    orq %r14, %rcx
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT:    orq %r15, %rdx
+; SSE-NEXT:    orq %rcx, %rdx
+; SSE-NEXT:    cmovnel %eax, %ebp
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r14
+; SSE-NEXT:    bsrq %r14, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r15
+; SSE-NEXT:    bsrq %r15, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    orl $64, %ecx
+; SSE-NEXT:    testq %r14, %r14
+; SSE-NEXT:    cmovnel %eax, %ecx
+; SSE-NEXT:    bsrq %r11, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %r9, %rdx
+; SSE-NEXT:    xorl $63, %edx
+; SSE-NEXT:    orl $64, %edx
+; SSE-NEXT:    testq %r11, %r11
+; SSE-NEXT:    cmovnel %eax, %edx
+; SSE-NEXT:    subl $-128, %edx
+; SSE-NEXT:    movq %r15, %rax
+; SSE-NEXT:    orq %r14, %rax
+; SSE-NEXT:    cmovnel %ecx, %edx
+; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT:    bsrq %r15, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %r12, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    orl $64, %ecx
+; SSE-NEXT:    testq %r15, %r15
+; SSE-NEXT:    cmovnel %eax, %ecx
+; SSE-NEXT:    movl $127, %eax
+; SSE-NEXT:    bsrq %rdi, %rax
+; SSE-NEXT:    bsrq %rsi, %rdi
+; SSE-NEXT:    xorl $63, %edi
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rsi, %rsi
+; SSE-NEXT:    cmovnel %edi, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %r15, %r12
+; SSE-NEXT:    cmovnel %ecx, %eax
+; SSE-NEXT:    orq %r14, %r11
+; SSE-NEXT:    orq {{[0-9]+}}(%rsp), %r9
+; SSE-NEXT:    addl $256, %eax # imm = 0x100
+; SSE-NEXT:    orq %r11, %r9
+; SSE-NEXT:    cmovnel %edx, %eax
+; SSE-NEXT:    orq {{[0-9]+}}(%rsp), %r13
+; SSE-NEXT:    orq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT:    orq %r13, %r10
+; SSE-NEXT:    orq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT:    orq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT:    orq %rbx, %r8
+; SSE-NEXT:    addl $512, %eax # imm = 0x200
+; SSE-NEXT:    orq %r10, %r8
+; SSE-NEXT:    cmovnel %ebp, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    popq %r12
+; SSE-NEXT:    popq %r13
+; SSE-NEXT:    popq %r14
+; SSE-NEXT:    popq %r15
+; SSE-NEXT:    popq %rbp
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: test_ctlz_i1024:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %rbp
+; AVX2-NEXT:    pushq %r15
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %r13
+; AVX2-NEXT:    pushq %r12
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    movq %r9, %r14
+; AVX2-NEXT:    movq %r8, %r11
+; AVX2-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r15
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r8
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r12
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    lzcntq %r12, %rcx
+; AVX2-NEXT:    xorl %r9d, %r9d
+; AVX2-NEXT:    lzcntq %r8, %r9
+; AVX2-NEXT:    addl $64, %r9d
+; AVX2-NEXT:    testq %r12, %r12
+; AVX2-NEXT:    cmovnel %ecx, %r9d
+; AVX2-NEXT:    xorl %esi, %esi
+; AVX2-NEXT:    lzcntq %r10, %rsi
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    lzcntq %rax, %rcx
+; AVX2-NEXT:    addl $64, %ecx
+; AVX2-NEXT:    testq %r10, %r10
+; AVX2-NEXT:    cmovnel %esi, %ecx
+; AVX2-NEXT:    subl $-128, %ecx
+; AVX2-NEXT:    movq %r8, %rsi
+; AVX2-NEXT:    orq %r12, %rsi
+; AVX2-NEXT:    cmovnel %r9d, %ecx
+; AVX2-NEXT:    xorl %edi, %edi
+; AVX2-NEXT:    lzcntq %rbx, %rdi
+; AVX2-NEXT:    xorl %esi, %esi
+; AVX2-NEXT:    lzcntq %r15, %rsi
+; AVX2-NEXT:    addl $64, %esi
+; AVX2-NEXT:    testq %rbx, %rbx
+; AVX2-NEXT:    cmovnel %edi, %esi
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r13
+; AVX2-NEXT:    xorl %ebp, %ebp
+; AVX2-NEXT:    lzcntq %r13, %rbp
+; AVX2-NEXT:    addl $64, %ebp
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r9
+; AVX2-NEXT:    xorl %edi, %edi
+; AVX2-NEXT:    lzcntq %r9, %rdi
+; AVX2-NEXT:    testq %r9, %r9
+; AVX2-NEXT:    cmovnel %edi, %ebp
+; AVX2-NEXT:    subl $-128, %ebp
+; AVX2-NEXT:    movq %r15, %rdi
+; AVX2-NEXT:    orq %rbx, %rdi
+; AVX2-NEXT:    cmovnel %esi, %ebp
+; AVX2-NEXT:    addl $256, %ebp # imm = 0x100
+; AVX2-NEXT:    movq %r10, %rdi
+; AVX2-NEXT:    orq %r12, %rdi
+; AVX2-NEXT:    movq %rax, %rsi
+; AVX2-NEXT:    orq %r8, %rsi
+; AVX2-NEXT:    orq %rdi, %rsi
+; AVX2-NEXT:    cmovnel %ecx, %ebp
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %rdi, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r12
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    lzcntq %r12, %rcx
+; AVX2-NEXT:    testq %r12, %r12
+; AVX2-NEXT:    cmovnel %ecx, %eax
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    lzcntq %r11, %rcx
+; AVX2-NEXT:    addl $64, %ecx
+; AVX2-NEXT:    xorl %esi, %esi
+; AVX2-NEXT:    lzcntq %r14, %rsi
+; AVX2-NEXT:    testq %r14, %r14
+; AVX2-NEXT:    cmovnel %esi, %ecx
+; AVX2-NEXT:    subl $-128, %ecx
+; AVX2-NEXT:    movq %rdi, %rsi
+; AVX2-NEXT:    orq %r12, %rsi
+; AVX2-NEXT:    cmovnel %eax, %ecx
+; AVX2-NEXT:    movq %rdx, %rdi
+; AVX2-NEXT:    lzcntq %rdx, %rdx
+; AVX2-NEXT:    addl $64, %edx
+; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %r10, %rax
+; AVX2-NEXT:    testq %r10, %r10
+; AVX2-NEXT:    cmovnel %eax, %edx
+; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT:    lzcntq %rax, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; AVX2-NEXT:    lzcntq %rsi, %r8
+; AVX2-NEXT:    testq %rsi, %rsi
+; AVX2-NEXT:    cmovnel %r8d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %r10, %rdi
+; AVX2-NEXT:    cmovnel %edx, %eax
+; AVX2-NEXT:    orq %r12, %r14
+; AVX2-NEXT:    orq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT:    addl $256, %eax # imm = 0x100
+; AVX2-NEXT:    orq %r14, %r11
+; AVX2-NEXT:    cmovnel %ecx, %eax
+; AVX2-NEXT:    orq {{[0-9]+}}(%rsp), %rbx
+; AVX2-NEXT:    orq {{[0-9]+}}(%rsp), %r9
+; AVX2-NEXT:    orq %rbx, %r9
+; AVX2-NEXT:    orq {{[0-9]+}}(%rsp), %r15
+; AVX2-NEXT:    orq {{[0-9]+}}(%rsp), %r13
+; AVX2-NEXT:    orq %r15, %r13
+; AVX2-NEXT:    addl $512, %eax # imm = 0x200
+; AVX2-NEXT:    orq %r9, %r13
+; AVX2-NEXT:    cmovnel %ebp, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r12
+; AVX2-NEXT:    popq %r13
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    popq %r15
+; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_ctlz_i1024:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %rbp
+; AVX512-NEXT:    pushq %r15
+; AVX512-NEXT:    pushq %r14
+; AVX512-NEXT:    pushq %r13
+; AVX512-NEXT:    pushq %r12
+; AVX512-NEXT:    pushq %rbx
+; AVX512-NEXT:    movq %r9, %r14
+; AVX512-NEXT:    movq %r8, %r11
+; AVX512-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r15
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r8
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r12
+; AVX512-NEXT:    lzcntq %r12, %rcx
+; AVX512-NEXT:    lzcntq %r8, %r9
+; AVX512-NEXT:    addl $64, %r9d
+; AVX512-NEXT:    testq %r12, %r12
+; AVX512-NEXT:    cmovnel %ecx, %r9d
+; AVX512-NEXT:    lzcntq %r10, %rsi
+; AVX512-NEXT:    lzcntq %rax, %rcx
+; AVX512-NEXT:    addl $64, %ecx
+; AVX512-NEXT:    testq %r10, %r10
+; AVX512-NEXT:    cmovnel %esi, %ecx
+; AVX512-NEXT:    subl $-128, %ecx
+; AVX512-NEXT:    movq %r8, %rsi
+; AVX512-NEXT:    orq %r12, %rsi
+; AVX512-NEXT:    cmovnel %r9d, %ecx
+; AVX512-NEXT:    lzcntq %rbx, %rdi
+; AVX512-NEXT:    lzcntq %r15, %rsi
+; AVX512-NEXT:    addl $64, %esi
+; AVX512-NEXT:    testq %rbx, %rbx
+; AVX512-NEXT:    cmovnel %edi, %esi
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r13
+; AVX512-NEXT:    lzcntq %r13, %rbp
+; AVX512-NEXT:    addl $64, %ebp
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r9
+; AVX512-NEXT:    lzcntq %r9, %rdi
+; AVX512-NEXT:    testq %r9, %r9
+; AVX512-NEXT:    cmovnel %edi, %ebp
+; AVX512-NEXT:    subl $-128, %ebp
+; AVX512-NEXT:    movq %r15, %rdi
+; AVX512-NEXT:    orq %rbx, %rdi
+; AVX512-NEXT:    cmovnel %esi, %ebp
+; AVX512-NEXT:    addl $256, %ebp # imm = 0x100
+; AVX512-NEXT:    movq %r10, %rdi
+; AVX512-NEXT:    orq %r12, %rdi
+; AVX512-NEXT:    movq %rax, %rsi
+; AVX512-NEXT:    orq %r8, %rsi
+; AVX512-NEXT:    orq %rdi, %rsi
+; AVX512-NEXT:    cmovnel %ecx, %ebp
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; AVX512-NEXT:    lzcntq %rdi, %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r12
+; AVX512-NEXT:    lzcntq %r12, %rcx
+; AVX512-NEXT:    testq %r12, %r12
+; AVX512-NEXT:    cmovnel %ecx, %eax
+; AVX512-NEXT:    lzcntq %r11, %rcx
+; AVX512-NEXT:    addl $64, %ecx
+; AVX512-NEXT:    lzcntq %r14, %rsi
+; AVX512-NEXT:    testq %r14, %r14
+; AVX512-NEXT:    cmovnel %esi, %ecx
+; AVX512-NEXT:    subl $-128, %ecx
+; AVX512-NEXT:    movq %rdi, %rsi
+; AVX512-NEXT:    orq %r12, %rsi
+; AVX512-NEXT:    cmovnel %eax, %ecx
+; AVX512-NEXT:    movq %rdx, %rdi
+; AVX512-NEXT:    lzcntq %rdx, %rdx
+; AVX512-NEXT:    addl $64, %edx
+; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX512-NEXT:    lzcntq %r10, %rax
+; AVX512-NEXT:    testq %r10, %r10
+; AVX512-NEXT:    cmovnel %eax, %edx
+; AVX512-NEXT:    lzcntq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; AVX512-NEXT:    lzcntq %rsi, %r8
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    cmovnel %r8d, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %r10, %rdi
+; AVX512-NEXT:    cmovnel %edx, %eax
+; AVX512-NEXT:    orq %r12, %r14
+; AVX512-NEXT:    orq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT:    addl $256, %eax # imm = 0x100
+; AVX512-NEXT:    orq %r14, %r11
+; AVX512-NEXT:    cmovnel %ecx, %eax
+; AVX512-NEXT:    orq {{[0-9]+}}(%rsp), %rbx
+; AVX512-NEXT:    orq {{[0-9]+}}(%rsp), %r9
+; AVX512-NEXT:    orq %rbx, %r9
+; AVX512-NEXT:    orq {{[0-9]+}}(%rsp), %r15
+; AVX512-NEXT:    orq {{[0-9]+}}(%rsp), %r13
+; AVX512-NEXT:    orq %r15, %r13
+; AVX512-NEXT:    addl $512, %eax # imm = 0x200
+; AVX512-NEXT:    orq %r9, %r13
+; AVX512-NEXT:    cmovnel %ebp, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    popq %rbx
+; AVX512-NEXT:    popq %r12
+; AVX512-NEXT:    popq %r13
+; AVX512-NEXT:    popq %r14
+; AVX512-NEXT:    popq %r15
+; AVX512-NEXT:    popq %rbp
+; AVX512-NEXT:    retq
+  %cnt = call i1024 @llvm.ctlz.i1024(i1024 %a0, i1 0)
+  %res = trunc i1024 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_ctlz_i1024(ptr %p0) nounwind {
+; SSE-LABEL: load_ctlz_i1024:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rbp
+; SSE-NEXT:    pushq %r15
+; SSE-NEXT:    pushq %r14
+; SSE-NEXT:    pushq %r13
+; SSE-NEXT:    pushq %r12
+; SSE-NEXT:    pushq %rbx
+; SSE-NEXT:    movq 40(%rdi), %rbp
+; SSE-NEXT:    movq 64(%rdi), %rbx
+; SSE-NEXT:    movq 72(%rdi), %r11
+; SSE-NEXT:    movq 80(%rdi), %r12
+; SSE-NEXT:    movq 88(%rdi), %r14
+; SSE-NEXT:    movq 96(%rdi), %rsi
+; SSE-NEXT:    movq 104(%rdi), %r9
+; SSE-NEXT:    movq 112(%rdi), %r10
+; SSE-NEXT:    movq 120(%rdi), %r8
+; SSE-NEXT:    bsrq %r8, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %r10, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    orl $64, %ecx
+; SSE-NEXT:    testq %r8, %r8
+; SSE-NEXT:    cmovnel %eax, %ecx
+; SSE-NEXT:    bsrq %r9, %rdx
+; SSE-NEXT:    xorl $63, %edx
+; SSE-NEXT:    bsrq %rsi, %rax
+; SSE-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    orl $64, %eax
+; SSE-NEXT:    testq %r9, %r9
+; SSE-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    cmovnel %edx, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    movq %r10, %rdx
+; SSE-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    orq %r8, %rdx
+; SSE-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    cmovnel %ecx, %eax
+; SSE-NEXT:    bsrq %r14, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    bsrq %r12, %rdx
+; SSE-NEXT:    xorl $63, %edx
+; SSE-NEXT:    orl $64, %edx
+; SSE-NEXT:    testq %r14, %r14
+; SSE-NEXT:    cmovnel %ecx, %edx
+; SSE-NEXT:    bsrq %r11, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    bsrq %rbx, %r15
+; SSE-NEXT:    xorl $63, %r15d
+; SSE-NEXT:    orl $64, %r15d
+; SSE-NEXT:    testq %r11, %r11
+; SSE-NEXT:    cmovnel %ecx, %r15d
+; SSE-NEXT:    subl $-128, %r15d
+; SSE-NEXT:    movq %r12, %rcx
+; SSE-NEXT:    orq %r14, %rcx
+; SSE-NEXT:    cmovnel %edx, %r15d
+; SSE-NEXT:    movq 48(%rdi), %r12
+; SSE-NEXT:    addl $256, %r15d # imm = 0x100
+; SSE-NEXT:    movq %r9, %rcx
+; SSE-NEXT:    orq %r8, %rcx
+; SSE-NEXT:    movq %rsi, %rdx
+; SSE-NEXT:    orq %r10, %rdx
+; SSE-NEXT:    orq %rcx, %rdx
+; SSE-NEXT:    movq 56(%rdi), %r13
+; SSE-NEXT:    cmovnel %eax, %r15d
+; SSE-NEXT:    bsrq %r13, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    bsrq %r12, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    orl $64, %ecx
+; SSE-NEXT:    testq %r13, %r13
+; SSE-NEXT:    cmovnel %eax, %ecx
+; SSE-NEXT:    movq %rbp, %r10
+; SSE-NEXT:    bsrq %rbp, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    movq 32(%rdi), %r8
+; SSE-NEXT:    bsrq %r8, %rbp
+; SSE-NEXT:    xorl $63, %ebp
+; SSE-NEXT:    orl $64, %ebp
+; SSE-NEXT:    testq %r10, %r10
+; SSE-NEXT:    cmovnel %eax, %ebp
+; SSE-NEXT:    subl $-128, %ebp
+; SSE-NEXT:    movq %r12, %rax
+; SSE-NEXT:    orq %r13, %rax
+; SSE-NEXT:    cmovnel %ecx, %ebp
+; SSE-NEXT:    movq 24(%rdi), %r9
+; SSE-NEXT:    bsrq %r9, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    movq 16(%rdi), %rsi
+; SSE-NEXT:    bsrq %rsi, %rcx
+; SSE-NEXT:    xorl $63, %ecx
+; SSE-NEXT:    orl $64, %ecx
+; SSE-NEXT:    testq %r9, %r9
+; SSE-NEXT:    cmovnel %eax, %ecx
+; SSE-NEXT:    movl $127, %eax
+; SSE-NEXT:    bsrq (%rdi), %rax
+; SSE-NEXT:    movq 8(%rdi), %rdi
+; SSE-NEXT:    bsrq %rdi, %rdx
+; SSE-NEXT:    xorl $63, %edx
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rdi, %rdi
+; SSE-NEXT:    cmovnel %edx, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %r9, %rsi
+; SSE-NEXT:    cmovnel %ecx, %eax
+; SSE-NEXT:    orq %r13, %r10
+; SSE-NEXT:    orq %r12, %r8
+; SSE-NEXT:    addl $256, %eax # imm = 0x100
+; SSE-NEXT:    orq %r10, %r8
+; SSE-NEXT:    cmovnel %ebp, %eax
+; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; SSE-NEXT:    orq %r14, %r11
+; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; SSE-NEXT:    orq %rcx, %rbx
+; SSE-NEXT:    addl $512, %eax # imm = 0x200
+; SSE-NEXT:    orq %r11, %rbx
+; SSE-NEXT:    cmovnel %r15d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    popq %r12
+; SSE-NEXT:    popq %r13
+; SSE-NEXT:    popq %r14
+; SSE-NEXT:    popq %r15
+; SSE-NEXT:    popq %rbp
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_ctlz_i1024:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %rbp
+; AVX2-NEXT:    pushq %r15
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %r13
+; AVX2-NEXT:    pushq %r12
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    movq 48(%rdi), %r9
+; AVX2-NEXT:    movq 56(%rdi), %rbp
+; AVX2-NEXT:    movq 64(%rdi), %r11
+; AVX2-NEXT:    movq 72(%rdi), %r10
+; AVX2-NEXT:    movq 80(%rdi), %r14
+; AVX2-NEXT:    movq 88(%rdi), %rbx
+; AVX2-NEXT:    movq 96(%rdi), %rdx
+; AVX2-NEXT:    movq 104(%rdi), %r8
+; AVX2-NEXT:    movq 112(%rdi), %rsi
+; AVX2-NEXT:    movq 120(%rdi), %r15
+; AVX2-NEXT:    lzcntq %r15, %rax
+; AVX2-NEXT:    lzcntq %rsi, %rcx
+; AVX2-NEXT:    addl $64, %ecx
+; AVX2-NEXT:    testq %r15, %r15
+; AVX2-NEXT:    cmovnel %eax, %ecx
+; AVX2-NEXT:    xorl %r12d, %r12d
+; AVX2-NEXT:    lzcntq %r8, %r12
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %rdx, %rax
+; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %r8, %r8
+; AVX2-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    cmovnel %r12d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    movq %rsi, %r12
+; AVX2-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    orq %r15, %r12
+; AVX2-NEXT:    cmovnel %ecx, %eax
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    lzcntq %rbx, %rcx
+; AVX2-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    xorl %r13d, %r13d
+; AVX2-NEXT:    lzcntq %r14, %r13
+; AVX2-NEXT:    addl $64, %r13d
+; AVX2-NEXT:    testq %rbx, %rbx
+; AVX2-NEXT:    cmovnel %ecx, %r13d
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    lzcntq %r10, %rcx
+; AVX2-NEXT:    xorl %r12d, %r12d
+; AVX2-NEXT:    lzcntq %r11, %r12
+; AVX2-NEXT:    addl $64, %r12d
+; AVX2-NEXT:    testq %r10, %r10
+; AVX2-NEXT:    cmovnel %ecx, %r12d
+; AVX2-NEXT:    subl $-128, %r12d
+; AVX2-NEXT:    movq %r14, %rcx
+; AVX2-NEXT:    orq %rbx, %rcx
+; AVX2-NEXT:    cmovnel %r13d, %r12d
+; AVX2-NEXT:    addl $256, %r12d # imm = 0x100
+; AVX2-NEXT:    movq %r8, %rcx
+; AVX2-NEXT:    orq %r15, %rcx
+; AVX2-NEXT:    orq %rsi, %rdx
+; AVX2-NEXT:    orq %rcx, %rdx
+; AVX2-NEXT:    cmovnel %eax, %r12d
+; AVX2-NEXT:    movq %rbp, %r14
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    lzcntq %rbp, %rcx
+; AVX2-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %r9, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rbp, %rbp
+; AVX2-NEXT:    cmovnel %ecx, %eax
+; AVX2-NEXT:    movq 32(%rdi), %r13
+; AVX2-NEXT:    xorl %ebp, %ebp
+; AVX2-NEXT:    lzcntq %r13, %rbp
+; AVX2-NEXT:    addl $64, %ebp
+; AVX2-NEXT:    movq 40(%rdi), %r8
+; AVX2-NEXT:    xorl %edx, %edx
+; AVX2-NEXT:    lzcntq %r8, %rdx
+; AVX2-NEXT:    testq %r8, %r8
+; AVX2-NEXT:    cmovnel %edx, %ebp
+; AVX2-NEXT:    subl $-128, %ebp
+; AVX2-NEXT:    movq %r9, %rdx
+; AVX2-NEXT:    orq %r14, %rdx
+; AVX2-NEXT:    cmovnel %eax, %ebp
+; AVX2-NEXT:    movq 16(%rdi), %r9
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    lzcntq %r9, %rcx
+; AVX2-NEXT:    addl $64, %ecx
+; AVX2-NEXT:    movq 24(%rdi), %rdx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq %rdx, %rax
+; AVX2-NEXT:    testq %rdx, %rdx
+; AVX2-NEXT:    cmovnel %eax, %ecx
+; AVX2-NEXT:    movq 8(%rdi), %rsi
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    lzcntq (%rdi), %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    lzcntq %rsi, %rdi
+; AVX2-NEXT:    testq %rsi, %rsi
+; AVX2-NEXT:    cmovnel %edi, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %rdx, %r9
+; AVX2-NEXT:    cmovnel %ecx, %eax
+; AVX2-NEXT:    orq %r14, %r8
+; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; AVX2-NEXT:    addl $256, %eax # imm = 0x100
+; AVX2-NEXT:    orq %r8, %r13
+; AVX2-NEXT:    cmovnel %ebp, %eax
+; AVX2-NEXT:    orq %r15, %rbx
+; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; AVX2-NEXT:    orq %rbx, %r10
+; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; AVX2-NEXT:    orq %rcx, %r11
+; AVX2-NEXT:    addl $512, %eax # imm = 0x200
+; AVX2-NEXT:    orq %r10, %r11
+; AVX2-NEXT:    cmovnel %r12d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r12
+; AVX2-NEXT:    popq %r13
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    popq %r15
+; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_ctlz_i1024:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %rbp
+; AVX512-NEXT:    pushq %r15
+; AVX512-NEXT:    pushq %r14
+; AVX512-NEXT:    pushq %r13
+; AVX512-NEXT:    pushq %r12
+; AVX512-NEXT:    pushq %rbx
+; AVX512-NEXT:    movq 32(%rdi), %r14
+; AVX512-NEXT:    movq 48(%rdi), %rbp
+; AVX512-NEXT:    movq 64(%rdi), %r11
+; AVX512-NEXT:    movq 72(%rdi), %r10
+; AVX512-NEXT:    movq 80(%rdi), %rdx
+; AVX512-NEXT:    movq 88(%rdi), %rbx
+; AVX512-NEXT:    movq 96(%rdi), %rsi
+; AVX512-NEXT:    movq 104(%rdi), %r9
+; AVX512-NEXT:    movq 112(%rdi), %r8
+; AVX512-NEXT:    movq 120(%rdi), %r15
+; AVX512-NEXT:    lzcntq %r15, %rax
+; AVX512-NEXT:    lzcntq %r8, %rcx
+; AVX512-NEXT:    addl $64, %ecx
+; AVX512-NEXT:    testq %r15, %r15
+; AVX512-NEXT:    cmovnel %eax, %ecx
+; AVX512-NEXT:    lzcntq %r9, %r12
+; AVX512-NEXT:    lzcntq %rsi, %rax
+; AVX512-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %r9, %r9
+; AVX512-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    cmovnel %r12d, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    movq %r8, %r12
+; AVX512-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    orq %r15, %r12
+; AVX512-NEXT:    cmovnel %ecx, %eax
+; AVX512-NEXT:    lzcntq %rbx, %rcx
+; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    lzcntq %rdx, %r13
+; AVX512-NEXT:    addl $64, %r13d
+; AVX512-NEXT:    testq %rbx, %rbx
+; AVX512-NEXT:    cmovnel %ecx, %r13d
+; AVX512-NEXT:    lzcntq %r10, %rcx
+; AVX512-NEXT:    lzcntq %r11, %r12
+; AVX512-NEXT:    addl $64, %r12d
+; AVX512-NEXT:    testq %r10, %r10
+; AVX512-NEXT:    cmovnel %ecx, %r12d
+; AVX512-NEXT:    subl $-128, %r12d
+; AVX512-NEXT:    movq %rdx, %rcx
+; AVX512-NEXT:    orq %rbx, %rcx
+; AVX512-NEXT:    cmovnel %r13d, %r12d
+; AVX512-NEXT:    addl $256, %r12d # imm = 0x100
+; AVX512-NEXT:    movq %r9, %rcx
+; AVX512-NEXT:    orq %r15, %rcx
+; AVX512-NEXT:    orq %r8, %rsi
+; AVX512-NEXT:    orq %rcx, %rsi
+; AVX512-NEXT:    movq 56(%rdi), %r13
+; AVX512-NEXT:    cmovnel %eax, %r12d
+; AVX512-NEXT:    lzcntq %r13, %rcx
+; AVX512-NEXT:    movq %rbp, %rsi
+; AVX512-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    lzcntq %rbp, %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %r13, %r13
+; AVX512-NEXT:    cmovnel %ecx, %eax
+; AVX512-NEXT:    lzcntq %r14, %rbp
+; AVX512-NEXT:    addl $64, %ebp
+; AVX512-NEXT:    movq 40(%rdi), %r8
+; AVX512-NEXT:    lzcntq %r8, %rdx
+; AVX512-NEXT:    testq %r8, %r8
+; AVX512-NEXT:    cmovnel %edx, %ebp
+; AVX512-NEXT:    subl $-128, %ebp
+; AVX512-NEXT:    movq %rsi, %rdx
+; AVX512-NEXT:    orq %r13, %rdx
+; AVX512-NEXT:    cmovnel %eax, %ebp
+; AVX512-NEXT:    movq 16(%rdi), %r9
+; AVX512-NEXT:    lzcntq %r9, %rcx
+; AVX512-NEXT:    addl $64, %ecx
+; AVX512-NEXT:    movq 24(%rdi), %rdx
+; AVX512-NEXT:    lzcntq %rdx, %rax
+; AVX512-NEXT:    testq %rdx, %rdx
+; AVX512-NEXT:    cmovnel %eax, %ecx
+; AVX512-NEXT:    movq 8(%rdi), %rsi
+; AVX512-NEXT:    lzcntq (%rdi), %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    lzcntq %rsi, %rdi
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    cmovnel %edi, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %rdx, %r9
+; AVX512-NEXT:    cmovnel %ecx, %eax
+; AVX512-NEXT:    orq %r13, %r8
+; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; AVX512-NEXT:    addl $256, %eax # imm = 0x100
+; AVX512-NEXT:    orq %r8, %r14
+; AVX512-NEXT:    cmovnel %ebp, %eax
+; AVX512-NEXT:    orq %r15, %rbx
+; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; AVX512-NEXT:    orq %rbx, %r10
+; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; AVX512-NEXT:    orq %rcx, %r11
+; AVX512-NEXT:    addl $512, %eax # imm = 0x200
+; AVX512-NEXT:    orq %r10, %r11
+; AVX512-NEXT:    cmovnel %r12d, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    popq %rbx
+; AVX512-NEXT:    popq %r12
+; AVX512-NEXT:    popq %r13
+; AVX512-NEXT:    popq %r14
+; AVX512-NEXT:    popq %r15
+; AVX512-NEXT:    popq %rbp
+; AVX512-NEXT:    retq
+  %a0 = load i1024, ptr %p0
+  %cnt = call i1024 @llvm.ctlz.i1024(i1024 %a0, i1 0)
+  %res = trunc i1024 %cnt to i32
+  ret i32 %res
+}
+
+;
+; CTTZ
+;
+
+define i32 @test_cttz_i128(i128 %a0) nounwind {
+; SSE-LABEL: test_cttz_i128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    rep bsfq %rdi, %rcx
+; SSE-NEXT:    movl $64, %eax
+; SSE-NEXT:    rep bsfq %rsi, %rax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rdi, %rdi
+; SSE-NEXT:    cmovnel %ecx, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: test_cttz_i128:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    tzcntq %rdi, %rcx
+; AVX2-NEXT:    tzcntq %rsi, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rdi, %rdi
+; AVX2-NEXT:    cmovnel %ecx, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_cttz_i128:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    tzcntq %rdi, %rcx
+; AVX512-NEXT:    tzcntq %rsi, %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rdi, %rdi
+; AVX512-NEXT:    cmovnel %ecx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %cnt = call i128 @llvm.cttz.i128(i128 %a0, i1 0)
+  %res = trunc i128 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_cttz_i128(ptr %p0) nounwind {
+; SSE-LABEL: load_cttz_i128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq (%rdi), %rcx
+; SSE-NEXT:    rep bsfq %rcx, %rdx
+; SSE-NEXT:    movl $64, %eax
+; SSE-NEXT:    rep bsfq 8(%rdi), %rax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rcx, %rcx
+; SSE-NEXT:    cmovnel %edx, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_cttz_i128:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    movq (%rdi), %rcx
+; AVX2-NEXT:    tzcntq %rcx, %rdx
+; AVX2-NEXT:    tzcntq 8(%rdi), %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rcx, %rcx
+; AVX2-NEXT:    cmovnel %edx, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_cttz_i128:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movq (%rdi), %rcx
+; AVX512-NEXT:    tzcntq %rcx, %rdx
+; AVX512-NEXT:    tzcntq 8(%rdi), %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rcx, %rcx
+; AVX512-NEXT:    cmovnel %edx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %a0 = load i128, ptr %p0
+  %cnt = call i128 @llvm.cttz.i128(i128 %a0, i1 0)
+  %res = trunc i128 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @test_cttz_i256(i256 %a0) nounwind {
+; SSE-LABEL: test_cttz_i256:
+; SSE:       # %bb.0:
+; SSE-NEXT:    rep bsfq %rdi, %rax
+; SSE-NEXT:    rep bsfq %rsi, %r8
+; SSE-NEXT:    addl $64, %r8d
+; SSE-NEXT:    testq %rdi, %rdi
+; SSE-NEXT:    cmovnel %eax, %r8d
+; SSE-NEXT:    rep bsfq %rdx, %r9
+; SSE-NEXT:    movl $64, %eax
+; SSE-NEXT:    rep bsfq %rcx, %rax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %r9d, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %rsi, %rdi
+; SSE-NEXT:    cmovnel %r8d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: test_cttz_i256:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    tzcntq %rdi, %rax
+; AVX2-NEXT:    tzcntq %rsi, %r8
+; AVX2-NEXT:    addl $64, %r8d
+; AVX2-NEXT:    testq %rdi, %rdi
+; AVX2-NEXT:    cmovnel %eax, %r8d
+; AVX2-NEXT:    tzcntq %rdx, %r9
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %rcx, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rdx, %rdx
+; AVX2-NEXT:    cmovnel %r9d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %rsi, %rdi
+; AVX2-NEXT:    cmovnel %r8d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_cttz_i256:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    tzcntq %rdi, %rax
+; AVX512-NEXT:    tzcntq %rsi, %r8
+; AVX512-NEXT:    addl $64, %r8d
+; AVX512-NEXT:    testq %rdi, %rdi
+; AVX512-NEXT:    cmovnel %eax, %r8d
+; AVX512-NEXT:    tzcntq %rdx, %r9
+; AVX512-NEXT:    tzcntq %rcx, %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rdx, %rdx
+; AVX512-NEXT:    cmovnel %r9d, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %rsi, %rdi
+; AVX512-NEXT:    cmovnel %r8d, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %cnt = call i256 @llvm.cttz.i256(i256 %a0, i1 0)
+  %res = trunc i256 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_cttz_i256(ptr %p0) nounwind {
+; SSE-LABEL: load_cttz_i256:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq 16(%rdi), %rcx
+; SSE-NEXT:    movq (%rdi), %rdx
+; SSE-NEXT:    movq 8(%rdi), %rsi
+; SSE-NEXT:    rep bsfq %rdx, %rax
+; SSE-NEXT:    rep bsfq %rsi, %r8
+; SSE-NEXT:    addl $64, %r8d
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %eax, %r8d
+; SSE-NEXT:    rep bsfq %rcx, %r9
+; SSE-NEXT:    movl $64, %eax
+; SSE-NEXT:    rep bsfq 24(%rdi), %rax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rcx, %rcx
+; SSE-NEXT:    cmovnel %r9d, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %rsi, %rdx
+; SSE-NEXT:    cmovnel %r8d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_cttz_i256:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    movq (%rdi), %rcx
+; AVX2-NEXT:    movq 8(%rdi), %rdx
+; AVX2-NEXT:    tzcntq %rcx, %rax
+; AVX2-NEXT:    tzcntq %rdx, %rsi
+; AVX2-NEXT:    addl $64, %esi
+; AVX2-NEXT:    testq %rcx, %rcx
+; AVX2-NEXT:    cmovnel %eax, %esi
+; AVX2-NEXT:    movq 16(%rdi), %r8
+; AVX2-NEXT:    tzcntq %r8, %r9
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq 24(%rdi), %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %r8, %r8
+; AVX2-NEXT:    cmovnel %r9d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %rdx, %rcx
+; AVX2-NEXT:    cmovnel %esi, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_cttz_i256:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movq 16(%rdi), %rcx
+; AVX512-NEXT:    movq (%rdi), %rdx
+; AVX512-NEXT:    movq 8(%rdi), %rsi
+; AVX512-NEXT:    tzcntq %rdx, %rax
+; AVX512-NEXT:    tzcntq %rsi, %r8
+; AVX512-NEXT:    addl $64, %r8d
+; AVX512-NEXT:    testq %rdx, %rdx
+; AVX512-NEXT:    cmovnel %eax, %r8d
+; AVX512-NEXT:    tzcntq %rcx, %r9
+; AVX512-NEXT:    tzcntq 24(%rdi), %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rcx, %rcx
+; AVX512-NEXT:    cmovnel %r9d, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %rsi, %rdx
+; AVX512-NEXT:    cmovnel %r8d, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
+  %a0 = load i256, ptr %p0
+  %cnt = call i256 @llvm.cttz.i256(i256 %a0, i1 0)
+  %res = trunc i256 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @test_cttz_i512(i512 %a0) nounwind {
+; SSE-LABEL: test_cttz_i512:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %r14
+; SSE-NEXT:    pushq %rbx
+; SSE-NEXT:    rep bsfq %rdi, %rax
+; SSE-NEXT:    rep bsfq %rsi, %r11
+; SSE-NEXT:    addl $64, %r11d
+; SSE-NEXT:    testq %rdi, %rdi
+; SSE-NEXT:    cmovnel %eax, %r11d
+; SSE-NEXT:    rep bsfq %rdx, %rax
+; SSE-NEXT:    rep bsfq %rcx, %r10
+; SSE-NEXT:    addl $64, %r10d
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %eax, %r10d
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT:    subl $-128, %r10d
+; SSE-NEXT:    movq %rdi, %rax
+; SSE-NEXT:    orq %rsi, %rax
+; SSE-NEXT:    cmovnel %r11d, %r10d
+; SSE-NEXT:    rep bsfq %r8, %rax
+; SSE-NEXT:    rep bsfq %r9, %r11
+; SSE-NEXT:    addl $64, %r11d
+; SSE-NEXT:    testq %r8, %r8
+; SSE-NEXT:    cmovnel %eax, %r11d
+; SSE-NEXT:    rep bsfq %rbx, %r14
+; SSE-NEXT:    movl $64, %eax
+; SSE-NEXT:    rep bsfq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rbx, %rbx
+; SSE-NEXT:    cmovnel %r14d, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %r9, %r8
+; SSE-NEXT:    cmovnel %r11d, %eax
+; SSE-NEXT:    addl $256, %eax # imm = 0x100
+; SSE-NEXT:    orq %rcx, %rsi
+; SSE-NEXT:    orq %rdx, %rdi
+; SSE-NEXT:    orq %rsi, %rdi
+; SSE-NEXT:    cmovnel %r10d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    popq %r14
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: test_cttz_i512:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    tzcntq %rdi, %rax
+; AVX2-NEXT:    tzcntq %rsi, %r11
+; AVX2-NEXT:    addl $64, %r11d
+; AVX2-NEXT:    testq %rdi, %rdi
+; AVX2-NEXT:    cmovnel %eax, %r11d
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %rdx, %rax
+; AVX2-NEXT:    tzcntq %rcx, %r10
+; AVX2-NEXT:    addl $64, %r10d
+; AVX2-NEXT:    testq %rdx, %rdx
+; AVX2-NEXT:    cmovnel %eax, %r10d
+; AVX2-NEXT:    subl $-128, %r10d
+; AVX2-NEXT:    movq %rdi, %rax
+; AVX2-NEXT:    orq %rsi, %rax
+; AVX2-NEXT:    cmovnel %r11d, %r10d
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %r8, %rax
+; AVX2-NEXT:    xorl %ebx, %ebx
+; AVX2-NEXT:    tzcntq %r9, %rbx
+; AVX2-NEXT:    addl $64, %ebx
+; AVX2-NEXT:    testq %r8, %r8
+; AVX2-NEXT:    cmovnel %eax, %ebx
+; AVX2-NEXT:    xorl %r14d, %r14d
+; AVX2-NEXT:    tzcntq %r11, %r14
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %r11, %r11
+; AVX2-NEXT:    cmovnel %r14d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %r9, %r8
+; AVX2-NEXT:    cmovnel %ebx, %eax
+; AVX2-NEXT:    addl $256, %eax # imm = 0x100
+; AVX2-NEXT:    orq %rcx, %rsi
+; AVX2-NEXT:    orq %rdx, %rdi
+; AVX2-NEXT:    orq %rsi, %rdi
+; AVX2-NEXT:    cmovnel %r10d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_cttz_i512:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %r14
+; AVX512-NEXT:    pushq %rbx
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT:    tzcntq %rdi, %rax
+; AVX512-NEXT:    tzcntq %rsi, %rbx
+; AVX512-NEXT:    addl $64, %ebx
+; AVX512-NEXT:    testq %rdi, %rdi
+; AVX512-NEXT:    cmovnel %eax, %ebx
+; AVX512-NEXT:    tzcntq %rdx, %rax
+; AVX512-NEXT:    tzcntq %rcx, %r10
+; AVX512-NEXT:    addl $64, %r10d
+; AVX512-NEXT:    testq %rdx, %rdx
+; AVX512-NEXT:    cmovnel %eax, %r10d
+; AVX512-NEXT:    subl $-128, %r10d
+; AVX512-NEXT:    movq %rdi, %rax
+; AVX512-NEXT:    orq %rsi, %rax
+; AVX512-NEXT:    cmovnel %ebx, %r10d
+; AVX512-NEXT:    tzcntq %r8, %rax
+; AVX512-NEXT:    tzcntq %r9, %rbx
+; AVX512-NEXT:    addl $64, %ebx
+; AVX512-NEXT:    testq %r8, %r8
+; AVX512-NEXT:    cmovnel %eax, %ebx
+; AVX512-NEXT:    tzcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT:    tzcntq %r11, %r14
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %r11, %r11
+; AVX512-NEXT:    cmovnel %r14d, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %r9, %r8
+; AVX512-NEXT:    cmovnel %ebx, %eax
+; AVX512-NEXT:    addl $256, %eax # imm = 0x100
+; AVX512-NEXT:    orq %rcx, %rsi
+; AVX512-NEXT:    orq %rdx, %rdi
+; AVX512-NEXT:    orq %rsi, %rdi
+; AVX512-NEXT:    cmovnel %r10d, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    popq %rbx
+; AVX512-NEXT:    popq %r14
+; AVX512-NEXT:    retq
+  %cnt = call i512 @llvm.cttz.i512(i512 %a0, i1 0)
+  %res = trunc i512 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_cttz_i512(ptr %p0) nounwind {
+; SSE-LABEL: load_cttz_i512:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %r15
+; SSE-NEXT:    pushq %r14
+; SSE-NEXT:    pushq %rbx
+; SSE-NEXT:    movq 48(%rdi), %r10
+; SSE-NEXT:    movq 40(%rdi), %r9
+; SSE-NEXT:    movq 24(%rdi), %r8
+; SSE-NEXT:    movq 16(%rdi), %rdx
+; SSE-NEXT:    movq (%rdi), %rcx
+; SSE-NEXT:    movq 8(%rdi), %rsi
+; SSE-NEXT:    rep bsfq %rcx, %rax
+; SSE-NEXT:    rep bsfq %rsi, %rbx
+; SSE-NEXT:    addl $64, %ebx
+; SSE-NEXT:    testq %rcx, %rcx
+; SSE-NEXT:    cmovnel %eax, %ebx
+; SSE-NEXT:    rep bsfq %rdx, %rax
+; SSE-NEXT:    rep bsfq %r8, %r11
+; SSE-NEXT:    addl $64, %r11d
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %eax, %r11d
+; SSE-NEXT:    movq 32(%rdi), %r14
+; SSE-NEXT:    subl $-128, %r11d
+; SSE-NEXT:    movq %rcx, %rax
+; SSE-NEXT:    orq %rsi, %rax
+; SSE-NEXT:    cmovnel %ebx, %r11d
+; SSE-NEXT:    rep bsfq %r14, %rax
+; SSE-NEXT:    rep bsfq %r9, %rbx
+; SSE-NEXT:    addl $64, %ebx
+; SSE-NEXT:    testq %r14, %r14
+; SSE-NEXT:    cmovnel %eax, %ebx
+; SSE-NEXT:    rep bsfq %r10, %r15
+; SSE-NEXT:    movl $64, %eax
+; SSE-NEXT:    rep bsfq 56(%rdi), %rax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %r10, %r10
+; SSE-NEXT:    cmovnel %r15d, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %r9, %r14
+; SSE-NEXT:    cmovnel %ebx, %eax
+; SSE-NEXT:    addl $256, %eax # imm = 0x100
+; SSE-NEXT:    orq %r8, %rsi
+; SSE-NEXT:    orq %rdx, %rcx
+; SSE-NEXT:    orq %rsi, %rcx
+; SSE-NEXT:    cmovnel %r11d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    popq %r14
+; SSE-NEXT:    popq %r15
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_cttz_i512:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %r15
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    movq 48(%rdi), %r10
+; AVX2-NEXT:    movq 40(%rdi), %r9
+; AVX2-NEXT:    movq 24(%rdi), %r8
+; AVX2-NEXT:    movq 16(%rdi), %rdx
+; AVX2-NEXT:    movq (%rdi), %rcx
+; AVX2-NEXT:    movq 8(%rdi), %rsi
+; AVX2-NEXT:    tzcntq %rcx, %rax
+; AVX2-NEXT:    xorl %ebx, %ebx
+; AVX2-NEXT:    tzcntq %rsi, %rbx
+; AVX2-NEXT:    addl $64, %ebx
+; AVX2-NEXT:    testq %rcx, %rcx
+; AVX2-NEXT:    cmovnel %eax, %ebx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %rdx, %rax
+; AVX2-NEXT:    tzcntq %r8, %r11
+; AVX2-NEXT:    addl $64, %r11d
+; AVX2-NEXT:    testq %rdx, %rdx
+; AVX2-NEXT:    cmovnel %eax, %r11d
+; AVX2-NEXT:    subl $-128, %r11d
+; AVX2-NEXT:    movq %rcx, %rax
+; AVX2-NEXT:    orq %rsi, %rax
+; AVX2-NEXT:    cmovnel %ebx, %r11d
+; AVX2-NEXT:    movq 32(%rdi), %rbx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %rbx, %rax
+; AVX2-NEXT:    xorl %r14d, %r14d
+; AVX2-NEXT:    tzcntq %r9, %r14
+; AVX2-NEXT:    addl $64, %r14d
+; AVX2-NEXT:    testq %rbx, %rbx
+; AVX2-NEXT:    cmovnel %eax, %r14d
+; AVX2-NEXT:    xorl %r15d, %r15d
+; AVX2-NEXT:    tzcntq %r10, %r15
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq 56(%rdi), %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %r10, %r10
+; AVX2-NEXT:    cmovnel %r15d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %r9, %rbx
+; AVX2-NEXT:    cmovnel %r14d, %eax
+; AVX2-NEXT:    addl $256, %eax # imm = 0x100
+; AVX2-NEXT:    orq %r8, %rsi
+; AVX2-NEXT:    orq %rdx, %rcx
+; AVX2-NEXT:    orq %rsi, %rcx
+; AVX2-NEXT:    cmovnel %r11d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    popq %r15
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_cttz_i512:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %r14
+; AVX512-NEXT:    pushq %rbx
+; AVX512-NEXT:    movq 48(%rdi), %r11
+; AVX512-NEXT:    movq 40(%rdi), %r9
+; AVX512-NEXT:    movq 32(%rdi), %r10
+; AVX512-NEXT:    movq 24(%rdi), %r8
+; AVX512-NEXT:    movq 16(%rdi), %rdx
+; AVX512-NEXT:    movq (%rdi), %rcx
+; AVX512-NEXT:    movq 8(%rdi), %rsi
+; AVX512-NEXT:    tzcntq %rcx, %rax
+; AVX512-NEXT:    tzcntq %rsi, %r14
+; AVX512-NEXT:    addl $64, %r14d
+; AVX512-NEXT:    testq %rcx, %rcx
+; AVX512-NEXT:    cmovnel %eax, %r14d
+; AVX512-NEXT:    tzcntq %rdx, %rax
+; AVX512-NEXT:    tzcntq %r8, %rbx
+; AVX512-NEXT:    addl $64, %ebx
+; AVX512-NEXT:    testq %rdx, %rdx
+; AVX512-NEXT:    cmovnel %eax, %ebx
+; AVX512-NEXT:    subl $-128, %ebx
+; AVX512-NEXT:    movq %rcx, %rax
+; AVX512-NEXT:    orq %rsi, %rax
+; AVX512-NEXT:    cmovnel %r14d, %ebx
+; AVX512-NEXT:    tzcntq %r10, %rax
+; AVX512-NEXT:    tzcntq %r9, %r14
+; AVX512-NEXT:    addl $64, %r14d
+; AVX512-NEXT:    testq %r10, %r10
+; AVX512-NEXT:    cmovnel %eax, %r14d
+; AVX512-NEXT:    tzcntq 56(%rdi), %rax
+; AVX512-NEXT:    tzcntq %r11, %rdi
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %r11, %r11
+; AVX512-NEXT:    cmovnel %edi, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %r9, %r10
+; AVX512-NEXT:    cmovnel %r14d, %eax
+; AVX512-NEXT:    addl $256, %eax # imm = 0x100
+; AVX512-NEXT:    orq %r8, %rsi
+; AVX512-NEXT:    orq %rdx, %rcx
+; AVX512-NEXT:    orq %rsi, %rcx
+; AVX512-NEXT:    cmovnel %ebx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    popq %rbx
+; AVX512-NEXT:    popq %r14
+; AVX512-NEXT:    retq
+  %a0 = load i512, ptr %p0
+  %cnt = call i512 @llvm.cttz.i512(i512 %a0, i1 0)
+  %res = trunc i512 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @test_cttz_i1024(i1024 %a0) nounwind {
+; SSE-LABEL: test_cttz_i1024:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rbp
+; SSE-NEXT:    pushq %r15
+; SSE-NEXT:    pushq %r14
+; SSE-NEXT:    pushq %r13
+; SSE-NEXT:    pushq %r12
+; SSE-NEXT:    pushq %rbx
+; SSE-NEXT:    movq %r9, %r13
+; SSE-NEXT:    movq %r8, %r14
+; SSE-NEXT:    movq %rcx, %rbx
+; SSE-NEXT:    movq %rdx, %r10
+; SSE-NEXT:    movq %rsi, %r9
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT:    rep bsfq %rdi, %rax
+; SSE-NEXT:    rep bsfq %r9, %r15
+; SSE-NEXT:    addl $64, %r15d
+; SSE-NEXT:    testq %rdi, %rdi
+; SSE-NEXT:    cmovnel %eax, %r15d
+; SSE-NEXT:    rep bsfq %r10, %r12
+; SSE-NEXT:    rep bsfq %rcx, %rax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %r10, %r10
+; SSE-NEXT:    cmovnel %r12d, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    movq %rdi, %r12
+; SSE-NEXT:    orq %r9, %r12
+; SSE-NEXT:    cmovnel %r15d, %eax
+; SSE-NEXT:    rep bsfq %r8, %r15
+; SSE-NEXT:    movq %r13, %rcx
+; SSE-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    rep bsfq %r13, %r13
+; SSE-NEXT:    addl $64, %r13d
+; SSE-NEXT:    testq %r8, %r8
+; SSE-NEXT:    cmovnel %r15d, %r13d
+; SSE-NEXT:    rep bsfq %rdx, %r12
+; SSE-NEXT:    rep bsfq {{[0-9]+}}(%rsp), %r15
+; SSE-NEXT:    addl $64, %r15d
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %r12d, %r15d
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r12
+; SSE-NEXT:    subl $-128, %r15d
+; SSE-NEXT:    movq %r8, %rbp
+; SSE-NEXT:    orq %rcx, %rbp
+; SSE-NEXT:    cmovnel %r13d, %r15d
+; SSE-NEXT:    addl $256, %r15d # imm = 0x100
+; SSE-NEXT:    movq %r9, %r13
+; SSE-NEXT:    orq %rbx, %r13
+; SSE-NEXT:    movq %rdi, %rbp
+; SSE-NEXT:    orq %r10, %rbp
+; SSE-NEXT:    orq %r13, %rbp
+; SSE-NEXT:    cmovnel %eax, %r15d
+; SSE-NEXT:    rep bsfq %r11, %r13
+; SSE-NEXT:    rep bsfq %r12, %rax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %r11, %r11
+; SSE-NEXT:    cmovnel %r13d, %eax
+; SSE-NEXT:    rep bsfq {{[0-9]+}}(%rsp), %r13
+; SSE-NEXT:    addl $64, %r13d
+; SSE-NEXT:    rep bsfq %rsi, %rcx
+; SSE-NEXT:    testq %rsi, %rsi
+; SSE-NEXT:    cmovnel %ecx, %r13d
+; SSE-NEXT:    subl $-128, %r13d
+; SSE-NEXT:    movq %r11, %rcx
+; SSE-NEXT:    orq %r12, %rcx
+; SSE-NEXT:    cmovnel %eax, %r13d
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; SSE-NEXT:    rep bsfq %rbp, %rcx
+; SSE-NEXT:    addl $64, %ecx
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT:    rep bsfq %rdx, %rax
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %eax, %ecx
+; SSE-NEXT:    movl $64, %eax
+; SSE-NEXT:    rep bsfq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT:    rep bsfq %r8, %rsi
+; SSE-NEXT:    testq %r8, %r8
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %rbp, %rdx
+; SSE-NEXT:    cmovnel %ecx, %eax
+; SSE-NEXT:    orq {{[0-9]+}}(%rsp), %r12
+; SSE-NEXT:    orq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT:    addl $256, %eax # imm = 0x100
+; SSE-NEXT:    orq %r12, %r11
+; SSE-NEXT:    cmovnel %r13d, %eax
+; SSE-NEXT:    orq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; SSE-NEXT:    orq %rbx, %r9
+; SSE-NEXT:    orq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT:    orq %r14, %rdi
+; SSE-NEXT:    orq %r10, %rdi
+; SSE-NEXT:    addl $512, %eax # imm = 0x200
+; SSE-NEXT:    orq %r9, %rdi
+; SSE-NEXT:    cmovnel %r15d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    popq %r12
+; SSE-NEXT:    popq %r13
+; SSE-NEXT:    popq %r14
+; SSE-NEXT:    popq %r15
+; SSE-NEXT:    popq %rbp
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: test_cttz_i1024:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %rbp
+; AVX2-NEXT:    pushq %r15
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %r13
+; AVX2-NEXT:    pushq %r12
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    movq %r9, %rbx
+; AVX2-NEXT:    movq %r8, %r14
+; AVX2-NEXT:    movq %rcx, %r11
+; AVX2-NEXT:    movq %rdx, %r10
+; AVX2-NEXT:    movq %rsi, %r9
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r8
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; AVX2-NEXT:    tzcntq %rdi, %rax
+; AVX2-NEXT:    xorl %r15d, %r15d
+; AVX2-NEXT:    tzcntq %r9, %r15
+; AVX2-NEXT:    addl $64, %r15d
+; AVX2-NEXT:    testq %rdi, %rdi
+; AVX2-NEXT:    cmovnel %eax, %r15d
+; AVX2-NEXT:    xorl %r12d, %r12d
+; AVX2-NEXT:    tzcntq %r10, %r12
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %r11, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %r10, %r10
+; AVX2-NEXT:    cmovnel %r12d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    movq %rdi, %r12
+; AVX2-NEXT:    orq %r9, %r12
+; AVX2-NEXT:    cmovnel %r15d, %eax
+; AVX2-NEXT:    xorl %r15d, %r15d
+; AVX2-NEXT:    tzcntq %r14, %r15
+; AVX2-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    xorl %r12d, %r12d
+; AVX2-NEXT:    tzcntq %rbx, %r12
+; AVX2-NEXT:    addl $64, %r12d
+; AVX2-NEXT:    testq %r14, %r14
+; AVX2-NEXT:    cmovnel %r15d, %r12d
+; AVX2-NEXT:    xorl %r13d, %r13d
+; AVX2-NEXT:    tzcntq %rcx, %r13
+; AVX2-NEXT:    xorl %r15d, %r15d
+; AVX2-NEXT:    tzcntq %rdx, %r15
+; AVX2-NEXT:    addl $64, %r15d
+; AVX2-NEXT:    testq %rcx, %rcx
+; AVX2-NEXT:    cmovnel %r13d, %r15d
+; AVX2-NEXT:    subl $-128, %r15d
+; AVX2-NEXT:    movq %r14, %r13
+; AVX2-NEXT:    orq %rbx, %r13
+; AVX2-NEXT:    cmovnel %r12d, %r15d
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r12
+; AVX2-NEXT:    addl $256, %r15d # imm = 0x100
+; AVX2-NEXT:    movq %r9, %r13
+; AVX2-NEXT:    orq %r11, %r13
+; AVX2-NEXT:    movq %rdi, %rbp
+; AVX2-NEXT:    orq %r10, %rbp
+; AVX2-NEXT:    orq %r13, %rbp
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r13
+; AVX2-NEXT:    cmovnel %eax, %r15d
+; AVX2-NEXT:    xorl %ebp, %ebp
+; AVX2-NEXT:    tzcntq %r12, %rbp
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %r13, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %r12, %r12
+; AVX2-NEXT:    cmovnel %ebp, %eax
+; AVX2-NEXT:    xorl %ebp, %ebp
+; AVX2-NEXT:    tzcntq %r8, %rbp
+; AVX2-NEXT:    addl $64, %ebp
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    tzcntq %rsi, %rcx
+; AVX2-NEXT:    testq %rsi, %rsi
+; AVX2-NEXT:    cmovnel %ecx, %ebp
+; AVX2-NEXT:    subl $-128, %ebp
+; AVX2-NEXT:    movq %r12, %rcx
+; AVX2-NEXT:    orq %r13, %rcx
+; AVX2-NEXT:    cmovnel %eax, %ebp
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    tzcntq %rbx, %rcx
+; AVX2-NEXT:    addl $64, %ecx
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %rdx, %rax
+; AVX2-NEXT:    testq %rdx, %rdx
+; AVX2-NEXT:    cmovnel %eax, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r8
+; AVX2-NEXT:    tzcntq %r8, %rsi
+; AVX2-NEXT:    testq %r8, %r8
+; AVX2-NEXT:    cmovnel %esi, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %rbx, %rdx
+; AVX2-NEXT:    cmovnel %ecx, %eax
+; AVX2-NEXT:    orq {{[0-9]+}}(%rsp), %r13
+; AVX2-NEXT:    orq {{[0-9]+}}(%rsp), %r12
+; AVX2-NEXT:    addl $256, %eax # imm = 0x100
+; AVX2-NEXT:    orq %r13, %r12
+; AVX2-NEXT:    cmovnel %ebp, %eax
+; AVX2-NEXT:    orq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; AVX2-NEXT:    orq %r11, %r9
+; AVX2-NEXT:    orq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT:    orq %r14, %rdi
+; AVX2-NEXT:    orq %r10, %rdi
+; AVX2-NEXT:    addl $512, %eax # imm = 0x200
+; AVX2-NEXT:    orq %r9, %rdi
+; AVX2-NEXT:    cmovnel %r15d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r12
+; AVX2-NEXT:    popq %r13
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    popq %r15
+; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_cttz_i1024:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %rbp
+; AVX512-NEXT:    pushq %r15
+; AVX512-NEXT:    pushq %r14
+; AVX512-NEXT:    pushq %r13
+; AVX512-NEXT:    pushq %r12
+; AVX512-NEXT:    pushq %rbx
+; AVX512-NEXT:    movq %r9, %r14
+; AVX512-NEXT:    movq %r8, %r15
+; AVX512-NEXT:    movq %rcx, %r11
+; AVX512-NEXT:    movq %rdx, %r10
+; AVX512-NEXT:    movq %rsi, %r9
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; AVX512-NEXT:    tzcntq %rdi, %rax
+; AVX512-NEXT:    tzcntq %r9, %r12
+; AVX512-NEXT:    addl $64, %r12d
+; AVX512-NEXT:    testq %rdi, %rdi
+; AVX512-NEXT:    cmovnel %eax, %r12d
+; AVX512-NEXT:    tzcntq %rdx, %r13
+; AVX512-NEXT:    tzcntq %r11, %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rdx, %rdx
+; AVX512-NEXT:    cmovnel %r13d, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    movq %rdi, %r13
+; AVX512-NEXT:    orq %r9, %r13
+; AVX512-NEXT:    cmovnel %r12d, %eax
+; AVX512-NEXT:    tzcntq %r8, %r12
+; AVX512-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    tzcntq %r14, %r13
+; AVX512-NEXT:    addl $64, %r13d
+; AVX512-NEXT:    testq %r8, %r8
+; AVX512-NEXT:    cmovnel %r12d, %r13d
+; AVX512-NEXT:    tzcntq %rcx, %rbp
+; AVX512-NEXT:    tzcntq {{[0-9]+}}(%rsp), %r12
+; AVX512-NEXT:    addl $64, %r12d
+; AVX512-NEXT:    testq %rcx, %rcx
+; AVX512-NEXT:    cmovnel %ebp, %r12d
+; AVX512-NEXT:    subl $-128, %r12d
+; AVX512-NEXT:    movq %r8, %rbp
+; AVX512-NEXT:    orq %r14, %rbp
+; AVX512-NEXT:    cmovnel %r13d, %r12d
+; AVX512-NEXT:    addl $256, %r12d # imm = 0x100
+; AVX512-NEXT:    movq %r9, %r13
+; AVX512-NEXT:    orq %r11, %r13
+; AVX512-NEXT:    movq %rdi, %rbp
+; AVX512-NEXT:    orq %rdx, %rbp
+; AVX512-NEXT:    orq %r13, %rbp
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r13
+; AVX512-NEXT:    cmovnel %eax, %r12d
+; AVX512-NEXT:    tzcntq %rbx, %rbp
+; AVX512-NEXT:    tzcntq %r13, %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rbx, %rbx
+; AVX512-NEXT:    cmovnel %ebp, %eax
+; AVX512-NEXT:    tzcntq {{[0-9]+}}(%rsp), %rbp
+; AVX512-NEXT:    addl $64, %ebp
+; AVX512-NEXT:    tzcntq %rsi, %rcx
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    cmovnel %ecx, %ebp
+; AVX512-NEXT:    subl $-128, %ebp
+; AVX512-NEXT:    movq %rbx, %rcx
+; AVX512-NEXT:    orq %r13, %rcx
+; AVX512-NEXT:    cmovnel %eax, %ebp
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r14
+; AVX512-NEXT:    tzcntq %r14, %rcx
+; AVX512-NEXT:    addl $64, %ecx
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; AVX512-NEXT:    tzcntq %rdx, %rax
+; AVX512-NEXT:    testq %rdx, %rdx
+; AVX512-NEXT:    cmovnel %eax, %ecx
+; AVX512-NEXT:    tzcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r8
+; AVX512-NEXT:    tzcntq %r8, %rsi
+; AVX512-NEXT:    testq %r8, %r8
+; AVX512-NEXT:    cmovnel %esi, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %r14, %rdx
+; AVX512-NEXT:    cmovnel %ecx, %eax
+; AVX512-NEXT:    orq {{[0-9]+}}(%rsp), %r13
+; AVX512-NEXT:    orq {{[0-9]+}}(%rsp), %rbx
+; AVX512-NEXT:    addl $256, %eax # imm = 0x100
+; AVX512-NEXT:    orq %r13, %rbx
+; AVX512-NEXT:    cmovnel %ebp, %eax
+; AVX512-NEXT:    orq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; AVX512-NEXT:    orq %r11, %r9
+; AVX512-NEXT:    orq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT:    orq %r15, %rdi
+; AVX512-NEXT:    orq %r10, %rdi
+; AVX512-NEXT:    addl $512, %eax # imm = 0x200
+; AVX512-NEXT:    orq %r9, %rdi
+; AVX512-NEXT:    cmovnel %r12d, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    popq %rbx
+; AVX512-NEXT:    popq %r12
+; AVX512-NEXT:    popq %r13
+; AVX512-NEXT:    popq %r14
+; AVX512-NEXT:    popq %r15
+; AVX512-NEXT:    popq %rbp
+; AVX512-NEXT:    retq
+  %cnt = call i1024 @llvm.cttz.i1024(i1024 %a0, i1 0)
+  %res = trunc i1024 %cnt to i32
+  ret i32 %res
+}
+
+define i32 @load_cttz_i1024(ptr %p0) nounwind {
+; SSE-LABEL: load_cttz_i1024:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rbp
+; SSE-NEXT:    pushq %r15
+; SSE-NEXT:    pushq %r14
+; SSE-NEXT:    pushq %r13
+; SSE-NEXT:    pushq %r12
+; SSE-NEXT:    pushq %rbx
+; SSE-NEXT:    movq 88(%rdi), %r10
+; SSE-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    movq 56(%rdi), %rcx
+; SSE-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    movq 40(%rdi), %rsi
+; SSE-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    movq 24(%rdi), %r9
+; SSE-NEXT:    movq 16(%rdi), %r15
+; SSE-NEXT:    movq (%rdi), %r8
+; SSE-NEXT:    movq 8(%rdi), %r11
+; SSE-NEXT:    rep bsfq %r8, %rax
+; SSE-NEXT:    rep bsfq %r11, %rdx
+; SSE-NEXT:    addl $64, %edx
+; SSE-NEXT:    testq %r8, %r8
+; SSE-NEXT:    cmovnel %eax, %edx
+; SSE-NEXT:    rep bsfq %r15, %rbx
+; SSE-NEXT:    rep bsfq %r9, %rax
+; SSE-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %r15, %r15
+; SSE-NEXT:    cmovnel %ebx, %eax
+; SSE-NEXT:    movq 32(%rdi), %rbx
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    movq %r8, %r14
+; SSE-NEXT:    orq %r11, %r14
+; SSE-NEXT:    cmovnel %edx, %eax
+; SSE-NEXT:    rep bsfq %rbx, %rdx
+; SSE-NEXT:    rep bsfq %rsi, %r12
+; SSE-NEXT:    addl $64, %r12d
+; SSE-NEXT:    testq %rbx, %rbx
+; SSE-NEXT:    cmovnel %edx, %r12d
+; SSE-NEXT:    movq 48(%rdi), %r13
+; SSE-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    rep bsfq %r13, %rdx
+; SSE-NEXT:    rep bsfq %rcx, %r14
+; SSE-NEXT:    addl $64, %r14d
+; SSE-NEXT:    testq %r13, %r13
+; SSE-NEXT:    cmovnel %edx, %r14d
+; SSE-NEXT:    subl $-128, %r14d
+; SSE-NEXT:    movq %rbx, %rdx
+; SSE-NEXT:    orq %rsi, %rdx
+; SSE-NEXT:    cmovnel %r12d, %r14d
+; SSE-NEXT:    movq 72(%rdi), %r12
+; SSE-NEXT:    addl $256, %r14d # imm = 0x100
+; SSE-NEXT:    movq %r11, %rdx
+; SSE-NEXT:    orq %r9, %rdx
+; SSE-NEXT:    movq %r8, %r13
+; SSE-NEXT:    orq %r15, %r13
+; SSE-NEXT:    orq %rdx, %r13
+; SSE-NEXT:    movq 64(%rdi), %r13
+; SSE-NEXT:    cmovnel %eax, %r14d
+; SSE-NEXT:    rep bsfq %r13, %rdx
+; SSE-NEXT:    rep bsfq %r12, %rax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %r13, %r13
+; SSE-NEXT:    cmovnel %edx, %eax
+; SSE-NEXT:    rep bsfq %r10, %rbp
+; SSE-NEXT:    addl $64, %ebp
+; SSE-NEXT:    movq 80(%rdi), %r10
+; SSE-NEXT:    rep bsfq %r10, %rcx
+; SSE-NEXT:    testq %r10, %r10
+; SSE-NEXT:    cmovnel %ecx, %ebp
+; SSE-NEXT:    subl $-128, %ebp
+; SSE-NEXT:    movq %r13, %rcx
+; SSE-NEXT:    orq %r12, %rcx
+; SSE-NEXT:    cmovnel %eax, %ebp
+; SSE-NEXT:    movq 104(%rdi), %r9
+; SSE-NEXT:    rep bsfq %r9, %rcx
+; SSE-NEXT:    addl $64, %ecx
+; SSE-NEXT:    movq 96(%rdi), %rdx
+; SSE-NEXT:    rep bsfq %rdx, %rax
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %eax, %ecx
+; SSE-NEXT:    movl $64, %eax
+; SSE-NEXT:    rep bsfq 120(%rdi), %rax
+; SSE-NEXT:    movq 112(%rdi), %rdi
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    rep bsfq %rdi, %rsi
+; SSE-NEXT:    testq %rdi, %rdi
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    orq %r9, %rdx
+; SSE-NEXT:    cmovnel %ecx, %eax
+; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; SSE-NEXT:    orq %r10, %r13
+; SSE-NEXT:    addl $256, %eax # imm = 0x100
+; SSE-NEXT:    orq %r12, %r13
+; SSE-NEXT:    cmovnel %ebp, %eax
+; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; SSE-NEXT:    orq %rcx, %r11
+; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; SSE-NEXT:    orq %rbx, %r8
+; SSE-NEXT:    orq %r15, %r8
+; SSE-NEXT:    addl $512, %eax # imm = 0x200
+; SSE-NEXT:    orq %r11, %r8
+; SSE-NEXT:    cmovnel %r14d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    popq %r12
+; SSE-NEXT:    popq %r13
+; SSE-NEXT:    popq %r14
+; SSE-NEXT:    popq %r15
+; SSE-NEXT:    popq %rbp
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: load_cttz_i1024:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %rbp
+; AVX2-NEXT:    pushq %r15
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %r13
+; AVX2-NEXT:    pushq %r12
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    movq 72(%rdi), %r14
+; AVX2-NEXT:    movq 64(%rdi), %r15
+; AVX2-NEXT:    movq 56(%rdi), %r9
+; AVX2-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    movq 48(%rdi), %rcx
+; AVX2-NEXT:    movq 40(%rdi), %r10
+; AVX2-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    movq 32(%rdi), %rsi
+; AVX2-NEXT:    movq 24(%rdi), %rbp
+; AVX2-NEXT:    movq 16(%rdi), %rbx
+; AVX2-NEXT:    movq (%rdi), %r8
+; AVX2-NEXT:    movq 8(%rdi), %r11
+; AVX2-NEXT:    tzcntq %r8, %rax
+; AVX2-NEXT:    tzcntq %r11, %rdx
+; AVX2-NEXT:    addl $64, %edx
+; AVX2-NEXT:    testq %r8, %r8
+; AVX2-NEXT:    cmovnel %eax, %edx
+; AVX2-NEXT:    xorl %r12d, %r12d
+; AVX2-NEXT:    tzcntq %rbx, %r12
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %rbp, %rax
+; AVX2-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rbx, %rbx
+; AVX2-NEXT:    cmovnel %r12d, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    movq %r8, %r12
+; AVX2-NEXT:    orq %r11, %r12
+; AVX2-NEXT:    cmovnel %edx, %eax
+; AVX2-NEXT:    xorl %edx, %edx
+; AVX2-NEXT:    tzcntq %rsi, %rdx
+; AVX2-NEXT:    xorl %r13d, %r13d
+; AVX2-NEXT:    tzcntq %r10, %r13
+; AVX2-NEXT:    addl $64, %r13d
+; AVX2-NEXT:    testq %rsi, %rsi
+; AVX2-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    cmovnel %edx, %r13d
+; AVX2-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT:    xorl %edx, %edx
+; AVX2-NEXT:    tzcntq %rcx, %rdx
+; AVX2-NEXT:    xorl %r12d, %r12d
+; AVX2-NEXT:    tzcntq %r9, %r12
+; AVX2-NEXT:    addl $64, %r12d
+; AVX2-NEXT:    testq %rcx, %rcx
+; AVX2-NEXT:    cmovnel %edx, %r12d
+; AVX2-NEXT:    subl $-128, %r12d
+; AVX2-NEXT:    movq %rsi, %rdx
+; AVX2-NEXT:    orq %r10, %rdx
+; AVX2-NEXT:    cmovnel %r13d, %r12d
+; AVX2-NEXT:    addl $256, %r12d # imm = 0x100
+; AVX2-NEXT:    movq %r11, %rdx
+; AVX2-NEXT:    orq %rbp, %rdx
+; AVX2-NEXT:    movq %r8, %r13
+; AVX2-NEXT:    orq %rbx, %r13
+; AVX2-NEXT:    orq %rdx, %r13
+; AVX2-NEXT:    cmovnel %eax, %r12d
+; AVX2-NEXT:    xorl %edx, %edx
+; AVX2-NEXT:    tzcntq %r15, %rdx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %r14, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %r15, %r15
+; AVX2-NEXT:    cmovnel %edx, %eax
+; AVX2-NEXT:    movq 88(%rdi), %rbp
+; AVX2-NEXT:    xorl %r13d, %r13d
+; AVX2-NEXT:    tzcntq %rbp, %r13
+; AVX2-NEXT:    addl $64, %r13d
+; AVX2-NEXT:    movq 80(%rdi), %r10
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    tzcntq %r10, %rcx
+; AVX2-NEXT:    testq %r10, %r10
+; AVX2-NEXT:    cmovnel %ecx, %r13d
+; AVX2-NEXT:    subl $-128, %r13d
+; AVX2-NEXT:    movq %r15, %rcx
+; AVX2-NEXT:    orq %r14, %rcx
+; AVX2-NEXT:    cmovnel %eax, %r13d
+; AVX2-NEXT:    movq 104(%rdi), %r9
+; AVX2-NEXT:    xorl %ecx, %ecx
+; AVX2-NEXT:    tzcntq %r9, %rcx
+; AVX2-NEXT:    addl $64, %ecx
+; AVX2-NEXT:    movq 96(%rdi), %rdx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %rdx, %rax
+; AVX2-NEXT:    testq %rdx, %rdx
+; AVX2-NEXT:    cmovnel %eax, %ecx
+; AVX2-NEXT:    movq 112(%rdi), %rsi
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq 120(%rdi), %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    tzcntq %rsi, %rdi
+; AVX2-NEXT:    testq %rsi, %rsi
+; AVX2-NEXT:    cmovnel %edi, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %r9, %rdx
+; AVX2-NEXT:    cmovnel %ecx, %eax
+; AVX2-NEXT:    orq %rbp, %r14
+; AVX2-NEXT:    orq %r10, %r15
+; AVX2-NEXT:    addl $256, %eax # imm = 0x100
+; AVX2-NEXT:    orq %r14, %r15
+; AVX2-NEXT:    cmovnel %r13d, %eax
+; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; AVX2-NEXT:    orq %rcx, %r11
+; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; AVX2-NEXT:    orq %rbx, %r8
+; AVX2-NEXT:    addl $512, %eax # imm = 0x200
+; AVX2-NEXT:    orq %r11, %r8
+; AVX2-NEXT:    cmovnel %r12d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r12
+; AVX2-NEXT:    popq %r13
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    popq %r15
+; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_cttz_i1024:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %rbp
+; AVX512-NEXT:    pushq %r15
+; AVX512-NEXT:    pushq %r14
+; AVX512-NEXT:    pushq %r13
+; AVX512-NEXT:    pushq %r12
+; AVX512-NEXT:    pushq %rbx
+; AVX512-NEXT:    movq 88(%rdi), %rbp
+; AVX512-NEXT:    movq 72(%rdi), %r15
+; AVX512-NEXT:    movq 56(%rdi), %r9
+; AVX512-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    movq 48(%rdi), %rcx
+; AVX512-NEXT:    movq 40(%rdi), %r10
+; AVX512-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    movq 32(%rdi), %rsi
+; AVX512-NEXT:    movq 24(%rdi), %r14
+; AVX512-NEXT:    movq 16(%rdi), %rbx
+; AVX512-NEXT:    movq (%rdi), %r8
+; AVX512-NEXT:    movq 8(%rdi), %r11
+; AVX512-NEXT:    tzcntq %r8, %rax
+; AVX512-NEXT:    tzcntq %r11, %rdx
+; AVX512-NEXT:    addl $64, %edx
+; AVX512-NEXT:    testq %r8, %r8
+; AVX512-NEXT:    cmovnel %eax, %edx
+; AVX512-NEXT:    tzcntq %rbx, %r12
+; AVX512-NEXT:    tzcntq %r14, %rax
+; AVX512-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %rbx, %rbx
+; AVX512-NEXT:    cmovnel %r12d, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    movq %r8, %r12
+; AVX512-NEXT:    orq %r11, %r12
+; AVX512-NEXT:    cmovnel %edx, %eax
+; AVX512-NEXT:    tzcntq %rsi, %rdx
+; AVX512-NEXT:    tzcntq %r10, %r13
+; AVX512-NEXT:    addl $64, %r13d
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    cmovnel %edx, %r13d
+; AVX512-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT:    tzcntq %rcx, %rdx
+; AVX512-NEXT:    tzcntq %r9, %r12
+; AVX512-NEXT:    addl $64, %r12d
+; AVX512-NEXT:    testq %rcx, %rcx
+; AVX512-NEXT:    cmovnel %edx, %r12d
+; AVX512-NEXT:    subl $-128, %r12d
+; AVX512-NEXT:    movq %rsi, %rdx
+; AVX512-NEXT:    orq %r10, %rdx
+; AVX512-NEXT:    cmovnel %r13d, %r12d
+; AVX512-NEXT:    addl $256, %r12d # imm = 0x100
+; AVX512-NEXT:    movq %r11, %rdx
+; AVX512-NEXT:    orq %r14, %rdx
+; AVX512-NEXT:    movq %r8, %r13
+; AVX512-NEXT:    orq %rbx, %r13
+; AVX512-NEXT:    orq %rdx, %r13
+; AVX512-NEXT:    movq 64(%rdi), %r13
+; AVX512-NEXT:    cmovnel %eax, %r12d
+; AVX512-NEXT:    tzcntq %r13, %rdx
+; AVX512-NEXT:    tzcntq %r15, %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    testq %r13, %r13
+; AVX512-NEXT:    cmovnel %edx, %eax
+; AVX512-NEXT:    movq %rbp, %r14
+; AVX512-NEXT:    tzcntq %rbp, %rbp
+; AVX512-NEXT:    addl $64, %ebp
+; AVX512-NEXT:    movq 80(%rdi), %r10
+; AVX512-NEXT:    tzcntq %r10, %rcx
+; AVX512-NEXT:    testq %r10, %r10
+; AVX512-NEXT:    cmovnel %ecx, %ebp
+; AVX512-NEXT:    subl $-128, %ebp
+; AVX512-NEXT:    movq %r13, %rcx
+; AVX512-NEXT:    orq %r15, %rcx
+; AVX512-NEXT:    cmovnel %eax, %ebp
+; AVX512-NEXT:    movq 104(%rdi), %r9
+; AVX512-NEXT:    tzcntq %r9, %rcx
+; AVX512-NEXT:    addl $64, %ecx
+; AVX512-NEXT:    movq 96(%rdi), %rdx
+; AVX512-NEXT:    tzcntq %rdx, %rax
+; AVX512-NEXT:    testq %rdx, %rdx
+; AVX512-NEXT:    cmovnel %eax, %ecx
+; AVX512-NEXT:    movq 112(%rdi), %rsi
+; AVX512-NEXT:    tzcntq 120(%rdi), %rax
+; AVX512-NEXT:    addl $64, %eax
+; AVX512-NEXT:    tzcntq %rsi, %rdi
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    cmovnel %edi, %eax
+; AVX512-NEXT:    subl $-128, %eax
+; AVX512-NEXT:    orq %r9, %rdx
+; AVX512-NEXT:    cmovnel %ecx, %eax
+; AVX512-NEXT:    orq %r14, %r15
+; AVX512-NEXT:    orq %r10, %r13
+; AVX512-NEXT:    addl $256, %eax # imm = 0x100
+; AVX512-NEXT:    orq %r15, %r13
+; AVX512-NEXT:    cmovnel %ebp, %eax
+; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; AVX512-NEXT:    orq %rcx, %r11
+; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; AVX512-NEXT:    orq %rbx, %r8
+; AVX512-NEXT:    addl $512, %eax # imm = 0x200
+; AVX512-NEXT:    orq %r11, %r8
+; AVX512-NEXT:    cmovnel %r12d, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    popq %rbx
+; AVX512-NEXT:    popq %r12
+; AVX512-NEXT:    popq %r13
+; AVX512-NEXT:    popq %r14
+; AVX512-NEXT:    popq %r15
+; AVX512-NEXT:    popq %rbp
+; AVX512-NEXT:    retq
+  %a0 = load i1024, ptr %p0
+  %cnt = call i1024 @llvm.cttz.i1024(i1024 %a0, i1 0)
+  %res = trunc i1024 %cnt to i32
+  ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/dag-fmf-cse.ll b/llvm/test/CodeGen/X86/dag-fmf-cse.ll
index 609ccdc..cdcc082 100644
--- a/llvm/test/CodeGen/X86/dag-fmf-cse.ll
+++ b/llvm/test/CodeGen/X86/dag-fmf-cse.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=fma -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=fma | FileCheck %s
 
 ; If fast-math-flags are propagated correctly, the mul1 expression
 ; should be recognized as a factor in the last fsub, so we should
diff --git a/llvm/test/CodeGen/X86/fabs.ll b/llvm/test/CodeGen/X86/fabs.ll
index 82c82ac..4e6da83 100644
--- a/llvm/test/CodeGen/X86/fabs.ll
+++ b/llvm/test/CodeGen/X86/fabs.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse,-sse2,-sse3                                                | FileCheck %s --check-prefix=X87
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=X87UNSAFE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse,-sse2,-sse3 -enable-no-nans-fp-math | FileCheck %s --check-prefix=X87UNSAFE
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown                                                                      | FileCheck %s --check-prefix=X64
 
 declare float @fabsf(float)
diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
index 0fe107c..aae6cda 100644
--- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
+++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
@@ -22,25 +22,24 @@ declare <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat>, <4 x bfloat>)
 define float @test_fmaximumnum(float %x, float %y) nounwind {
 ; SSE2-LABEL: test_fmaximumnum:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    js .LBB0_2
-; SSE2-NEXT:  # %bb.1:
+; SSE2-NEXT:    js .LBB0_1
+; SSE2-NEXT:  # %bb.2:
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    jmp .LBB0_3
+; SSE2-NEXT:  .LBB0_1:
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:  .LBB0_3:
 ; SSE2-NEXT:    movdqa %xmm1, %xmm3
-; SSE2-NEXT:  .LBB0_2:
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
-; SSE2-NEXT:    cmpordss %xmm3, %xmm0
-; SSE2-NEXT:    movaps %xmm0, %xmm4
-; SSE2-NEXT:    andps %xmm3, %xmm4
-; SSE2-NEXT:    js .LBB0_4
-; SSE2-NEXT:  # %bb.3:
-; SSE2-NEXT:    movdqa %xmm2, %xmm1
-; SSE2-NEXT:  .LBB0_4:
-; SSE2-NEXT:    maxss %xmm1, %xmm3
-; SSE2-NEXT:    andnps %xmm3, %xmm0
-; SSE2-NEXT:    orps %xmm4, %xmm0
+; SSE2-NEXT:    maxss %xmm2, %xmm3
+; SSE2-NEXT:    movaps %xmm3, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm3, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm2
+; SSE2-NEXT:    andnps %xmm3, %xmm2
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    orps %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fmaximumnum:
@@ -56,7 +55,7 @@ define float @test_fmaximumnum(float %x, float %y) nounwind {
 ; AVX1-NEXT:    vmovdqa %xmm0, %xmm1
 ; AVX1-NEXT:  .LBB0_3:
 ; AVX1-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
-; AVX1-NEXT:    vcmpordss %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
@@ -70,7 +69,7 @@ define float @test_fmaximumnum(float %x, float %y) nounwind {
 ; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
-; AVX512-NEXT:    vcmpordss %xmm1, %xmm1, %k1
+; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    retq
 ;
@@ -95,7 +94,7 @@ define float @test_fmaximumnum(float %x, float %y) nounwind {
 ; X86-NEXT:    vmovdqa %xmm2, %xmm0
 ; X86-NEXT:  .LBB0_3:
 ; X86-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
@@ -371,26 +370,25 @@ define float @test_fmaximumnum_nsz(float %x, float %y) "no-signed-zeros-fp-math"
 ; SSE2-LABEL: test_fmaximumnum_nsz:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movaps %xmm0, %xmm2
-; SSE2-NEXT:    cmpordss %xmm0, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm3
-; SSE2-NEXT:    andps %xmm0, %xmm3
-; SSE2-NEXT:    maxss %xmm1, %xmm0
-; SSE2-NEXT:    andnps %xmm0, %xmm2
-; SSE2-NEXT:    orps %xmm3, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    maxss %xmm1, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm1
+; SSE2-NEXT:    cmpunordss %xmm2, %xmm1
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    andnps %xmm2, %xmm1
+; SSE2-NEXT:    orps %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fmaximumnum_nsz:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX512-LABEL: test_fmaximumnum_nsz:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vcmpordss %xmm0, %xmm0, %k1
+; AVX512-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX512-NEXT:    retq
@@ -404,9 +402,9 @@ define float @test_fmaximumnum_nsz(float %x, float %y) "no-signed-zeros-fp-math"
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm1
-; X86-NEXT:    vmaxss {{[0-9]+}}(%esp), %xmm0, %xmm2
-; X86-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
+; X86-NEXT:    vmaxss {{[0-9]+}}(%esp), %xmm0, %xmm1
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
+; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
 ; X86-NEXT:    popl %eax
@@ -421,23 +419,22 @@ define float @test_fmaximumnum_combine_cmps(float %x, float %y) nounwind {
 ; SSE2-NEXT:    divss %xmm0, %xmm1
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    movaps %xmm0, %xmm3
-; SSE2-NEXT:    js .LBB9_2
-; SSE2-NEXT:  # %bb.1:
-; SSE2-NEXT:    movaps %xmm1, %xmm3
-; SSE2-NEXT:  .LBB9_2:
-; SSE2-NEXT:    movaps %xmm3, %xmm2
-; SSE2-NEXT:    cmpordss %xmm3, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm4
-; SSE2-NEXT:    andps %xmm3, %xmm4
-; SSE2-NEXT:    js .LBB9_4
-; SSE2-NEXT:  # %bb.3:
+; SSE2-NEXT:    js .LBB9_1
+; SSE2-NEXT:  # %bb.2:
+; SSE2-NEXT:    movaps %xmm0, %xmm2
+; SSE2-NEXT:    jmp .LBB9_3
+; SSE2-NEXT:  .LBB9_1:
+; SSE2-NEXT:    movaps %xmm1, %xmm2
 ; SSE2-NEXT:    movaps %xmm0, %xmm1
-; SSE2-NEXT:  .LBB9_4:
-; SSE2-NEXT:    maxss %xmm1, %xmm3
+; SSE2-NEXT:  .LBB9_3:
+; SSE2-NEXT:    movaps %xmm1, %xmm3
+; SSE2-NEXT:    maxss %xmm2, %xmm3
+; SSE2-NEXT:    movaps %xmm3, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm3, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm2
 ; SSE2-NEXT:    andnps %xmm3, %xmm2
-; SSE2-NEXT:    orps %xmm4, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    orps %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fmaximumnum_combine_cmps:
@@ -454,7 +451,7 @@ define float @test_fmaximumnum_combine_cmps(float %x, float %y) nounwind {
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
 ; AVX1-NEXT:  .LBB9_3:
 ; AVX1-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
-; AVX1-NEXT:    vcmpordss %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
@@ -469,7 +466,7 @@ define float @test_fmaximumnum_combine_cmps(float %x, float %y) nounwind {
 ; AVX512F-NEXT:    vmovss %xmm1, %xmm2, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512F-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
-; AVX512F-NEXT:    vcmpordss %xmm1, %xmm1, %k1
+; AVX512F-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
 ; AVX512F-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512F-NEXT:    retq
 ;
@@ -507,7 +504,7 @@ define float @test_fmaximumnum_combine_cmps(float %x, float %y) nounwind {
 ; X86-NEXT:    vmovaps %xmm1, %xmm0
 ; X86-NEXT:  .LBB9_3:
 ; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
@@ -527,23 +524,23 @@ define float @test_fminimumnum(float %x, float %y) nounwind {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    movdqa %xmm1, %xmm3
-; SSE2-NEXT:    js .LBB10_2
-; SSE2-NEXT:  # %bb.1:
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:  .LBB10_2:
-; SSE2-NEXT:    movdqa %xmm3, %xmm2
-; SSE2-NEXT:    cmpordss %xmm3, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm4
-; SSE2-NEXT:    andps %xmm3, %xmm4
-; SSE2-NEXT:    js .LBB10_4
-; SSE2-NEXT:  # %bb.3:
+; SSE2-NEXT:    js .LBB10_1
+; SSE2-NEXT:  # %bb.2:
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    jmp .LBB10_3
+; SSE2-NEXT:  .LBB10_1:
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:  .LBB10_4:
-; SSE2-NEXT:    minss %xmm0, %xmm3
+; SSE2-NEXT:  .LBB10_3:
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    minss %xmm2, %xmm3
+; SSE2-NEXT:    movaps %xmm3, %xmm1
+; SSE2-NEXT:    cmpunordss %xmm3, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm2
 ; SSE2-NEXT:    andnps %xmm3, %xmm2
-; SSE2-NEXT:    orps %xmm4, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    andps %xmm0, %xmm1
+; SSE2-NEXT:    orps %xmm2, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fminimumnum:
@@ -559,7 +556,7 @@ define float @test_fminimumnum(float %x, float %y) nounwind {
 ; AVX1-NEXT:    vmovdqa %xmm1, %xmm0
 ; AVX1-NEXT:  .LBB10_3:
 ; AVX1-NEXT:    vminss %xmm2, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq
 ;
@@ -573,7 +570,7 @@ define float @test_fminimumnum(float %x, float %y) nounwind {
 ; AVX512-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    vminss %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vcmpordss %xmm0, %xmm0, %k1
+; AVX512-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX512-NEXT:    retq
@@ -599,7 +596,7 @@ define float @test_fminimumnum(float %x, float %y) nounwind {
 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
 ; X86-NEXT:  .LBB10_3:
 ; X86-NEXT:    vminss %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
@@ -857,26 +854,25 @@ define float @test_fminimumnum_nsz(float %x, float %y) nounwind {
 ; SSE2-LABEL: test_fminimumnum_nsz:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movaps %xmm0, %xmm2
-; SSE2-NEXT:    cmpordss %xmm0, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm3
-; SSE2-NEXT:    andps %xmm0, %xmm3
-; SSE2-NEXT:    minss %xmm1, %xmm0
-; SSE2-NEXT:    andnps %xmm0, %xmm2
-; SSE2-NEXT:    orps %xmm3, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    minss %xmm1, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm1
+; SSE2-NEXT:    cmpunordss %xmm2, %xmm1
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    andnps %xmm2, %xmm1
+; SSE2-NEXT:    orps %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fminimumnum_nsz:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vminss %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX512-LABEL: test_fminimumnum_nsz:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vcmpordss %xmm0, %xmm0, %k1
+; AVX512-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX512-NEXT:    retq
@@ -890,9 +886,9 @@ define float @test_fminimumnum_nsz(float %x, float %y) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm1
-; X86-NEXT:    vminss {{[0-9]+}}(%esp), %xmm0, %xmm2
-; X86-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
+; X86-NEXT:    vminss {{[0-9]+}}(%esp), %xmm0, %xmm1
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
+; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
 ; X86-NEXT:    popl %eax
@@ -907,23 +903,23 @@ define float @test_fminimumnum_combine_cmps(float %x, float %y) nounwind {
 ; SSE2-NEXT:    divss %xmm0, %xmm1
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    movaps %xmm1, %xmm3
-; SSE2-NEXT:    js .LBB19_2
-; SSE2-NEXT:  # %bb.1:
-; SSE2-NEXT:    movaps %xmm0, %xmm3
-; SSE2-NEXT:  .LBB19_2:
-; SSE2-NEXT:    movaps %xmm3, %xmm2
-; SSE2-NEXT:    cmpordss %xmm3, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm4
-; SSE2-NEXT:    andps %xmm3, %xmm4
-; SSE2-NEXT:    js .LBB19_4
-; SSE2-NEXT:  # %bb.3:
+; SSE2-NEXT:    js .LBB19_1
+; SSE2-NEXT:  # %bb.2:
+; SSE2-NEXT:    movaps %xmm1, %xmm2
+; SSE2-NEXT:    jmp .LBB19_3
+; SSE2-NEXT:  .LBB19_1:
+; SSE2-NEXT:    movaps %xmm0, %xmm2
 ; SSE2-NEXT:    movaps %xmm1, %xmm0
-; SSE2-NEXT:  .LBB19_4:
-; SSE2-NEXT:    minss %xmm0, %xmm3
+; SSE2-NEXT:  .LBB19_3:
+; SSE2-NEXT:    movaps %xmm0, %xmm3
+; SSE2-NEXT:    minss %xmm2, %xmm3
+; SSE2-NEXT:    movaps %xmm3, %xmm1
+; SSE2-NEXT:    cmpunordss %xmm3, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm2
 ; SSE2-NEXT:    andnps %xmm3, %xmm2
-; SSE2-NEXT:    orps %xmm4, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    andps %xmm0, %xmm1
+; SSE2-NEXT:    orps %xmm2, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fminimumnum_combine_cmps:
@@ -940,7 +936,7 @@ define float @test_fminimumnum_combine_cmps(float %x, float %y) nounwind {
 ; AVX1-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX1-NEXT:  .LBB19_3:
 ; AVX1-NEXT:    vminss %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq
 ;
@@ -955,7 +951,7 @@ define float @test_fminimumnum_combine_cmps(float %x, float %y) nounwind {
 ; AVX512F-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512F-NEXT:    vminss %xmm2, %xmm0, %xmm1
-; AVX512F-NEXT:    vcmpordss %xmm0, %xmm0, %k1
+; AVX512F-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
 ; AVX512F-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512F-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX512F-NEXT:    retq
@@ -994,7 +990,7 @@ define float @test_fminimumnum_combine_cmps(float %x, float %y) nounwind {
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
 ; X86-NEXT:  .LBB19_3:
 ; X86-NEXT:    vminss %xmm1, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
@@ -1022,9 +1018,9 @@ define <2 x double> @test_fminimumnum_vector(<2 x double> %x, <2 x double> %y) {
 ; SSE2-NEXT:    por %xmm4, %xmm3
 ; SSE2-NEXT:    movdqa %xmm3, %xmm1
 ; SSE2-NEXT:    minpd %xmm0, %xmm1
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
-; SSE2-NEXT:    cmpordpd %xmm3, %xmm0
-; SSE2-NEXT:    andpd %xmm0, %xmm3
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    cmpunordpd %xmm1, %xmm0
+; SSE2-NEXT:    pand %xmm0, %xmm3
 ; SSE2-NEXT:    andnpd %xmm1, %xmm0
 ; SSE2-NEXT:    orpd %xmm3, %xmm0
 ; SSE2-NEXT:    retq
@@ -1034,7 +1030,7 @@ define <2 x double> @test_fminimumnum_vector(<2 x double> %x, <2 x double> %y) {
 ; AVX-NEXT:    vblendvpd %xmm0, %xmm0, %xmm1, %xmm2
 ; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vminpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT:    vcmpordpd %xmm0, %xmm0, %xmm2
+; AVX-NEXT:    vcmpunordpd %xmm1, %xmm1, %xmm2
 ; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    retq
 ;
@@ -1048,7 +1044,7 @@ define <2 x double> @test_fminimumnum_vector(<2 x double> %x, <2 x double> %y) {
 ; X86-NEXT:    vblendvpd %xmm0, %xmm0, %xmm1, %xmm2
 ; X86-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    vminpd %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpordpd %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordpd %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    retl
   %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y)
@@ -1084,19 +1080,17 @@ define <2 x double> @test_fminimumnum_vector_zero(<2 x double> %x) {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
 ; SSE2-NEXT:    minpd %xmm0, %xmm1
-; SSE2-NEXT:    movapd %xmm0, %xmm2
-; SSE2-NEXT:    cmpordpd %xmm0, %xmm2
-; SSE2-NEXT:    andpd %xmm2, %xmm0
-; SSE2-NEXT:    andnpd %xmm1, %xmm2
-; SSE2-NEXT:    orpd %xmm2, %xmm0
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    cmpunordpd %xmm1, %xmm0
+; SSE2-NEXT:    andnpd %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_fminimumnum_vector_zero:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vminpd %xmm0, %xmm1, %xmm1
-; AVX-NEXT:    vcmpordpd %xmm0, %xmm0, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vminpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vandnpd %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX10_2-LABEL: test_fminimumnum_vector_zero:
@@ -1108,9 +1102,9 @@ define <2 x double> @test_fminimumnum_vector_zero(<2 x double> %x) {
 ; X86-LABEL: test_fminimumnum_vector_zero:
 ; X86:       # %bb.0:
 ; X86-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; X86-NEXT:    vminpd %xmm0, %xmm1, %xmm1
-; X86-NEXT:    vcmpordpd %xmm0, %xmm0, %xmm2
-; X86-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vminpd %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm1
+; X86-NEXT:    vandnpd %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    retl
   %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> <double 0., double 0.>)
   ret <2 x double> %r
@@ -1120,20 +1114,21 @@ define <4 x float> @test_fmaximumnum_vector_signed_zero(<4 x float> %x) {
 ; SSE2-LABEL: test_fmaximumnum_vector_signed_zero:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; SSE2-NEXT:    maxps %xmm0, %xmm1
-; SSE2-NEXT:    movaps %xmm0, %xmm2
-; SSE2-NEXT:    cmpordps %xmm0, %xmm2
-; SSE2-NEXT:    andps %xmm2, %xmm0
-; SSE2-NEXT:    andnps %xmm1, %xmm2
-; SSE2-NEXT:    orps %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm1, %xmm2
+; SSE2-NEXT:    maxps %xmm0, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordps %xmm2, %xmm0
+; SSE2-NEXT:    andps %xmm0, %xmm1
+; SSE2-NEXT:    andnps %xmm2, %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_fmaximumnum_vector_signed_zero:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT:    vmaxps %xmm0, %xmm1, %xmm1
-; AVX-NEXT:    vcmpordps %xmm0, %xmm0, %xmm2
-; AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vmaxps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm2
+; AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX10_2-LABEL: test_fmaximumnum_vector_signed_zero:
@@ -1144,9 +1139,9 @@ define <4 x float> @test_fmaximumnum_vector_signed_zero(<4 x float> %x) {
 ; X86-LABEL: test_fmaximumnum_vector_signed_zero:
 ; X86:       # %bb.0:
 ; X86-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; X86-NEXT:    vmaxps %xmm0, %xmm1, %xmm1
-; X86-NEXT:    vcmpordps %xmm0, %xmm0, %xmm2
-; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vmaxps %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    retl
   %r = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> <float -0., float -0., float -0., float -0.>)
   ret <4 x float> %r
@@ -1155,13 +1150,14 @@ define <4 x float> @test_fmaximumnum_vector_signed_zero(<4 x float> %x) {
 define <2 x double> @test_fminimumnum_vector_partially_zero(<2 x double> %x) {
 ; SSE2-LABEL: test_fminimumnum_vector_partially_zero:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movapd %xmm0, %xmm1
-; SSE2-NEXT:    cmpordpd %xmm0, %xmm1
-; SSE2-NEXT:    xorpd %xmm2, %xmm2
-; SSE2-NEXT:    movhpd {{.*#+}} xmm2 = xmm2[0],mem[0]
+; SSE2-NEXT:    xorpd %xmm1, %xmm1
+; SSE2-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; SSE2-NEXT:    movapd %xmm1, %xmm2
 ; SSE2-NEXT:    minpd %xmm0, %xmm2
-; SSE2-NEXT:    andpd %xmm1, %xmm0
-; SSE2-NEXT:    andnpd %xmm2, %xmm1
+; SSE2-NEXT:    movapd %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordpd %xmm2, %xmm0
+; SSE2-NEXT:    andpd %xmm0, %xmm1
+; SSE2-NEXT:    andnpd %xmm2, %xmm0
 ; SSE2-NEXT:    orpd %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -1169,9 +1165,9 @@ define <2 x double> @test_fminimumnum_vector_partially_zero(<2 x double> %x) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; AVX-NEXT:    vminpd %xmm0, %xmm1, %xmm1
-; AVX-NEXT:    vcmpordpd %xmm0, %xmm0, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vminpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX10_2-LABEL: test_fminimumnum_vector_partially_zero:
@@ -1185,9 +1181,9 @@ define <2 x double> @test_fminimumnum_vector_partially_zero(<2 x double> %x) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
 ; X86-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; X86-NEXT:    vminpd %xmm0, %xmm1, %xmm1
-; X86-NEXT:    vcmpordpd %xmm0, %xmm0, %xmm2
-; X86-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vminpd %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    retl
   %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> <double 0., double 5.>)
   ret <2 x double> %r
@@ -1212,9 +1208,9 @@ define <2 x double> @test_fminimumnum_vector_different_zeros(<2 x double> %x) {
 ; SSE2-NEXT:    por %xmm2, %xmm3
 ; SSE2-NEXT:    movdqa %xmm3, %xmm1
 ; SSE2-NEXT:    minpd %xmm4, %xmm1
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
-; SSE2-NEXT:    cmpordpd %xmm3, %xmm0
-; SSE2-NEXT:    andpd %xmm0, %xmm3
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    cmpunordpd %xmm1, %xmm0
+; SSE2-NEXT:    pand %xmm0, %xmm3
 ; SSE2-NEXT:    andnpd %xmm1, %xmm0
 ; SSE2-NEXT:    orpd %xmm3, %xmm0
 ; SSE2-NEXT:    retq
@@ -1226,7 +1222,7 @@ define <2 x double> @test_fminimumnum_vector_different_zeros(<2 x double> %x) {
 ; AVX-NEXT:    vblendvpd %xmm0, %xmm0, %xmm1, %xmm2
 ; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vminpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT:    vcmpordpd %xmm0, %xmm0, %xmm2
+; AVX-NEXT:    vcmpunordpd %xmm1, %xmm1, %xmm2
 ; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    retq
 ;
@@ -1244,7 +1240,7 @@ define <2 x double> @test_fminimumnum_vector_different_zeros(<2 x double> %x) {
 ; X86-NEXT:    vblendvpd %xmm0, %xmm0, %xmm1, %xmm2
 ; X86-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    vminpd %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpordpd %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordpd %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    retl
   %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> <double 0., double -0.>)
@@ -1278,20 +1274,24 @@ define <4 x float> @test_fmaximumnum_vector_non_zero(<4 x float> %x) {
 define <2 x double> @test_fminimumnum_vector_nan(<2 x double> %x) {
 ; SSE2-LABEL: test_fminimumnum_vector_nan:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    xorpd %xmm2, %xmm2
 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
 ; SSE2-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; SSE2-NEXT:    minpd %xmm0, %xmm1
-; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    movapd %xmm1, %xmm2
+; SSE2-NEXT:    minpd %xmm0, %xmm2
+; SSE2-NEXT:    movapd %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordpd %xmm2, %xmm0
+; SSE2-NEXT:    andpd %xmm0, %xmm1
+; SSE2-NEXT:    andnpd %xmm2, %xmm0
+; SSE2-NEXT:    orpd %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_fminimumnum_vector_nan:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vmovhpd {{.*#+}} xmm2 = xmm1[0],mem[0]
-; AVX-NEXT:    vminpd %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX-NEXT:    vminpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX10_2-LABEL: test_fminimumnum_vector_nan:
@@ -1306,7 +1306,7 @@ define <2 x double> @test_fminimumnum_vector_nan(<2 x double> %x) {
 ; X86-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
 ; X86-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
 ; X86-NEXT:    vminpd %xmm0, %xmm1, %xmm0
-; X86-NEXT:    vcmpordpd %xmm1, %xmm1, %xmm2
+; X86-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm2
 ; X86-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    retl
   %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> <double 0., double 0x7fff000000000000>)
@@ -1318,19 +1318,17 @@ define <2 x double> @test_fminimumnum_vector_zero_first(<2 x double> %x) {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
 ; SSE2-NEXT:    minpd %xmm0, %xmm1
-; SSE2-NEXT:    movapd %xmm0, %xmm2
-; SSE2-NEXT:    cmpordpd %xmm0, %xmm2
-; SSE2-NEXT:    andpd %xmm2, %xmm0
-; SSE2-NEXT:    andnpd %xmm1, %xmm2
-; SSE2-NEXT:    orpd %xmm2, %xmm0
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    cmpunordpd %xmm1, %xmm0
+; SSE2-NEXT:    andnpd %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_fminimumnum_vector_zero_first:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vminpd %xmm0, %xmm1, %xmm1
-; AVX-NEXT:    vcmpordpd %xmm0, %xmm0, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vminpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vandnpd %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX10_2-LABEL: test_fminimumnum_vector_zero_first:
@@ -1342,9 +1340,9 @@ define <2 x double> @test_fminimumnum_vector_zero_first(<2 x double> %x) {
 ; X86-LABEL: test_fminimumnum_vector_zero_first:
 ; X86:       # %bb.0:
 ; X86-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; X86-NEXT:    vminpd %xmm0, %xmm1, %xmm1
-; X86-NEXT:    vcmpordpd %xmm0, %xmm0, %xmm2
-; X86-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vminpd %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm1
+; X86-NEXT:    vandnpd %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    retl
   %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> <double 0., double 0.>, <2 x double> %x)
   ret <2 x double> %r
@@ -1378,20 +1376,21 @@ define <4 x float> @test_fmaximumnum_vector_signed_zero_first(<4 x float> %x) {
 ; SSE2-LABEL: test_fmaximumnum_vector_signed_zero_first:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; SSE2-NEXT:    maxps %xmm0, %xmm1
-; SSE2-NEXT:    movaps %xmm0, %xmm2
-; SSE2-NEXT:    cmpordps %xmm0, %xmm2
-; SSE2-NEXT:    andps %xmm2, %xmm0
-; SSE2-NEXT:    andnps %xmm1, %xmm2
-; SSE2-NEXT:    orps %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm1, %xmm2
+; SSE2-NEXT:    maxps %xmm0, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordps %xmm2, %xmm0
+; SSE2-NEXT:    andps %xmm0, %xmm1
+; SSE2-NEXT:    andnps %xmm2, %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_fmaximumnum_vector_signed_zero_first:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT:    vmaxps %xmm0, %xmm1, %xmm1
-; AVX-NEXT:    vcmpordps %xmm0, %xmm0, %xmm2
-; AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vmaxps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm2
+; AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX10_2-LABEL: test_fmaximumnum_vector_signed_zero_first:
@@ -1402,9 +1401,9 @@ define <4 x float> @test_fmaximumnum_vector_signed_zero_first(<4 x float> %x) {
 ; X86-LABEL: test_fmaximumnum_vector_signed_zero_first:
 ; X86:       # %bb.0:
 ; X86-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; X86-NEXT:    vmaxps %xmm0, %xmm1, %xmm1
-; X86-NEXT:    vcmpordps %xmm0, %xmm0, %xmm2
-; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vmaxps %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    retl
   %r = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> <float -0., float -0., float -0., float -0.>, <4 x float> %x)
   ret <4 x float> %r
@@ -1455,11 +1454,11 @@ define <4 x float> @test_fmaximumnum_v4f32_splat(<4 x float> %x, float %y) {
 ; SSE2-NEXT:    por %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    maxps %xmm4, %xmm1
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    cmpordps %xmm0, %xmm2
-; SSE2-NEXT:    andps %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm1, %xmm2
+; SSE2-NEXT:    cmpunordps %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
 ; SSE2-NEXT:    andnps %xmm1, %xmm2
-; SSE2-NEXT:    orps %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fmaximumnum_v4f32_splat:
@@ -1468,7 +1467,7 @@ define <4 x float> @test_fmaximumnum_v4f32_splat(<4 x float> %x, float %y) {
 ; AVX1-NEXT:    vblendvps %xmm0, %xmm1, %xmm0, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm0, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vmaxps %xmm2, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpordps %xmm0, %xmm0, %xmm2
+; AVX1-NEXT:    vcmpunordps %xmm1, %xmm1, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq
 ;
@@ -1478,7 +1477,7 @@ define <4 x float> @test_fmaximumnum_v4f32_splat(<4 x float> %x, float %y) {
 ; AVX512-NEXT:    vblendvps %xmm0, %xmm1, %xmm0, %xmm2
 ; AVX512-NEXT:    vblendvps %xmm0, %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vmaxps %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vcmpordps %xmm0, %xmm0, %xmm2
+; AVX512-NEXT:    vcmpunordps %xmm1, %xmm1, %xmm2
 ; AVX512-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
 ;
@@ -1494,7 +1493,7 @@ define <4 x float> @test_fmaximumnum_v4f32_splat(<4 x float> %x, float %y) {
 ; X86-NEXT:    vblendvps %xmm0, %xmm1, %xmm0, %xmm2
 ; X86-NEXT:    vblendvps %xmm0, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmaxps %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpordps %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordps %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    retl
   %splatinsert = insertelement <4 x float> poison, float %y, i64 0
@@ -1506,134 +1505,130 @@ define <4 x float> @test_fmaximumnum_v4f32_splat(<4 x float> %x, float %y) {
 define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
 ; SSE2-LABEL: test_fmaximumnum_v4f16:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    subq $104, %rsp
-; SSE2-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE2-NEXT:    subq $136, %rsp
+; SSE2-NEXT:    movaps %xmm0, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
+; SSE2-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE2-NEXT:    movaps %xmm1, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[1,1]
+; SSE2-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE2-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE2-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE2-NEXT:    psrld $16, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT:    movdqa %xmm1, (%rsp) # 16-byte Spill
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
+; SSE2-NEXT:    movaps %xmm1, %xmm0
 ; SSE2-NEXT:    psrld $16, %xmm0
 ; SSE2-NEXT:    callq __extendhfsf2@PLT
-; SSE2-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; SSE2-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; SSE2-NEXT:    callq __extendhfsf2@PLT
-; SSE2-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Folded Reload
-; SSE2-NEXT:    # xmm4 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    js .LBB33_2
-; SSE2-NEXT:  # %bb.1:
-; SSE2-NEXT:    movdqa %xmm4, %xmm2
-; SSE2-NEXT:  .LBB33_2:
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    cmpordss %xmm2, %xmm0
-; SSE2-NEXT:    movaps %xmm0, %xmm3
-; SSE2-NEXT:    andps %xmm2, %xmm3
-; SSE2-NEXT:    js .LBB33_4
-; SSE2-NEXT:  # %bb.3:
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:  .LBB33_4:
-; SSE2-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; SSE2-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; SSE2-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT:    maxss %xmm4, %xmm2
-; SSE2-NEXT:    andnps %xmm2, %xmm0
-; SSE2-NEXT:    orps %xmm3, %xmm0
+; SSE2-NEXT:    js .LBB33_1
+; SSE2-NEXT:  # %bb.2:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
+; SSE2-NEXT:    jmp .LBB33_3
+; SSE2-NEXT:  .LBB33_1:
+; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:  .LBB33_3:
+; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE2-NEXT:    psrlq $48, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE2-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
+; SSE2-NEXT:    psrlq $48, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm3, %xmm2
+; SSE2-NEXT:    maxss %xmm1, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm1
+; SSE2-NEXT:    andnps %xmm2, %xmm1
+; SSE2-NEXT:    andps %xmm3, %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
 ; SSE2-NEXT:    callq __truncsfhf2@PLT
 ; SSE2-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    callq __extendhfsf2@PLT
-; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE2-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE2-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    callq __extendhfsf2@PLT
-; SSE2-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Folded Reload
-; SSE2-NEXT:    # xmm4 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    js .LBB33_6
+; SSE2-NEXT:    js .LBB33_4
 ; SSE2-NEXT:  # %bb.5:
-; SSE2-NEXT:    movdqa %xmm4, %xmm2
-; SSE2-NEXT:  .LBB33_6:
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    cmpordss %xmm2, %xmm0
-; SSE2-NEXT:    movaps %xmm0, %xmm3
-; SSE2-NEXT:    andps %xmm2, %xmm3
-; SSE2-NEXT:    js .LBB33_8
-; SSE2-NEXT:  # %bb.7:
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:  .LBB33_8:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
+; SSE2-NEXT:    jmp .LBB33_6
+; SSE2-NEXT:  .LBB33_4:
 ; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; SSE2-NEXT:    psrlq $48, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT:    movdqa (%rsp), %xmm1 # 16-byte Reload
-; SSE2-NEXT:    psrlq $48, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, (%rsp) # 16-byte Spill
-; SSE2-NEXT:    maxss %xmm4, %xmm2
-; SSE2-NEXT:    andnps %xmm2, %xmm0
-; SSE2-NEXT:    orps %xmm3, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:  .LBB33_6:
+; SSE2-NEXT:    movdqa %xmm3, %xmm2
+; SSE2-NEXT:    maxss %xmm1, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm1
+; SSE2-NEXT:    andnps %xmm2, %xmm1
+; SSE2-NEXT:    andps %xmm3, %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
 ; SSE2-NEXT:    callq __truncsfhf2@PLT
 ; SSE2-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE2-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; SSE2-NEXT:    callq __extendhfsf2@PLT
-; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE2-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; SSE2-NEXT:    callq __extendhfsf2@PLT
-; SSE2-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Folded Reload
-; SSE2-NEXT:    # xmm4 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    js .LBB33_10
-; SSE2-NEXT:  # %bb.9:
-; SSE2-NEXT:    movdqa %xmm4, %xmm2
-; SSE2-NEXT:  .LBB33_10:
-; SSE2-NEXT:    movdqa %xmm2, %xmm1
-; SSE2-NEXT:    cmpordss %xmm2, %xmm1
-; SSE2-NEXT:    movaps %xmm1, %xmm3
-; SSE2-NEXT:    andps %xmm2, %xmm3
-; SSE2-NEXT:    js .LBB33_12
-; SSE2-NEXT:  # %bb.11:
-; SSE2-NEXT:    movdqa %xmm0, %xmm4
-; SSE2-NEXT:  .LBB33_12:
-; SSE2-NEXT:    maxss %xmm4, %xmm2
+; SSE2-NEXT:    js .LBB33_7
+; SSE2-NEXT:  # %bb.8:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
+; SSE2-NEXT:    jmp .LBB33_9
+; SSE2-NEXT:  .LBB33_7:
+; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:  .LBB33_9:
+; SSE2-NEXT:    movdqa %xmm3, %xmm2
+; SSE2-NEXT:    maxss %xmm1, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm1
 ; SSE2-NEXT:    andnps %xmm2, %xmm1
-; SSE2-NEXT:    orps %xmm3, %xmm1
-; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    andps %xmm3, %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
 ; SSE2-NEXT:    callq __truncsfhf2@PLT
 ; SSE2-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE2-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; SSE2-NEXT:    callq __extendhfsf2@PLT
-; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
+; SSE2-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
 ; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; SSE2-NEXT:    callq __extendhfsf2@PLT
-; SSE2-NEXT:    movd (%rsp), %xmm4 # 4-byte Folded Reload
-; SSE2-NEXT:    # xmm4 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    js .LBB33_14
-; SSE2-NEXT:  # %bb.13:
-; SSE2-NEXT:    movdqa %xmm4, %xmm2
-; SSE2-NEXT:  .LBB33_14:
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    cmpordss %xmm2, %xmm0
-; SSE2-NEXT:    movaps %xmm0, %xmm3
-; SSE2-NEXT:    andps %xmm2, %xmm3
-; SSE2-NEXT:    js .LBB33_16
-; SSE2-NEXT:  # %bb.15:
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:  .LBB33_16:
-; SSE2-NEXT:    maxss %xmm4, %xmm2
-; SSE2-NEXT:    andnps %xmm2, %xmm0
-; SSE2-NEXT:    orps %xmm3, %xmm0
+; SSE2-NEXT:    js .LBB33_10
+; SSE2-NEXT:  # %bb.11:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    movdqa (%rsp), %xmm3 # 16-byte Reload
+; SSE2-NEXT:    jmp .LBB33_12
+; SSE2-NEXT:  .LBB33_10:
+; SSE2-NEXT:    movdqa (%rsp), %xmm1 # 16-byte Reload
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:  .LBB33_12:
+; SSE2-NEXT:    movdqa %xmm3, %xmm2
+; SSE2-NEXT:    maxss %xmm1, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm1
+; SSE2-NEXT:    andnps %xmm2, %xmm1
+; SSE2-NEXT:    andps %xmm3, %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
 ; SSE2-NEXT:    callq __truncsfhf2@PLT
 ; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
@@ -1641,7 +1636,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; SSE2-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 ; SSE2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    addq $104, %rsp
+; SSE2-NEXT:    addq $136, %rsp
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fmaximumnum_v4f16:
@@ -1679,7 +1674,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
 ; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm2, %xmm0
-; AVX1-NEXT:    vcmpordss %xmm2, %xmm2, %xmm1
+; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm1
 ; AVX1-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    callq __truncsfhf2@PLT
 ; AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1700,7 +1695,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX1-NEXT:    vmovdqa %xmm0, %xmm2
 ; AVX1-NEXT:  .LBB33_6:
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm2, %xmm0
-; AVX1-NEXT:    vcmpordss %xmm2, %xmm2, %xmm1
+; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm1
 ; AVX1-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    callq __truncsfhf2@PLT
 ; AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1721,7 +1716,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX1-NEXT:    vmovdqa %xmm0, %xmm2
 ; AVX1-NEXT:  .LBB33_9:
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm2, %xmm0
-; AVX1-NEXT:    vcmpordss %xmm2, %xmm2, %xmm1
+; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm1
 ; AVX1-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    callq __truncsfhf2@PLT
 ; AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
@@ -1742,7 +1737,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX1-NEXT:    vmovdqa %xmm0, %xmm2
 ; AVX1-NEXT:  .LBB33_12:
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm2, %xmm0
-; AVX1-NEXT:    vcmpordss %xmm2, %xmm2, %xmm1
+; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm1
 ; AVX1-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    callq __truncsfhf2@PLT
 ; AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
@@ -1768,7 +1763,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vmovss %xmm3, %xmm4, %xmm4 {%k1}
 ; AVX512-NEXT:    vmovss %xmm2, %xmm3, %xmm3 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm4, %xmm3, %xmm2
-; AVX512-NEXT:    vcmpordss %xmm3, %xmm3, %k1
+; AVX512-NEXT:    vcmpunordss %xmm2, %xmm2, %k1
 ; AVX512-NEXT:    vmovss %xmm3, %xmm2, %xmm2 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
 ; AVX512-NEXT:    vshufps {{.*#+}} xmm3 = xmm0[3,3,3,3]
@@ -1783,7 +1778,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vmovss %xmm4, %xmm5, %xmm5 {%k1}
 ; AVX512-NEXT:    vmovss %xmm3, %xmm4, %xmm4 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm5, %xmm4, %xmm3
-; AVX512-NEXT:    vcmpordss %xmm4, %xmm4, %k1
+; AVX512-NEXT:    vcmpunordss %xmm3, %xmm3, %k1
 ; AVX512-NEXT:    vmovss %xmm4, %xmm3, %xmm3 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
 ; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
@@ -1799,7 +1794,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vmovss %xmm4, %xmm5, %xmm5 {%k1}
 ; AVX512-NEXT:    vmovss %xmm3, %xmm4, %xmm4 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm5, %xmm4, %xmm3
-; AVX512-NEXT:    vcmpordss %xmm4, %xmm4, %k1
+; AVX512-NEXT:    vcmpunordss %xmm3, %xmm3, %k1
 ; AVX512-NEXT:    vmovss %xmm4, %xmm3, %xmm3 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
 ; AVX512-NEXT:    vshufpd {{.*#+}} xmm4 = xmm0[1,0]
@@ -1814,7 +1809,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vmovss %xmm5, %xmm6, %xmm6 {%k1}
 ; AVX512-NEXT:    vmovss %xmm4, %xmm5, %xmm5 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm6, %xmm5, %xmm4
-; AVX512-NEXT:    vcmpordss %xmm5, %xmm5, %k1
+; AVX512-NEXT:    vcmpunordss %xmm4, %xmm4, %k1
 ; AVX512-NEXT:    vmovss %xmm5, %xmm4, %xmm4 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
 ; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
@@ -1831,7 +1826,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vmovss %xmm4, %xmm5, %xmm5 {%k1}
 ; AVX512-NEXT:    vmovss %xmm3, %xmm4, %xmm4 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm5, %xmm4, %xmm3
-; AVX512-NEXT:    vcmpordss %xmm4, %xmm4, %k1
+; AVX512-NEXT:    vcmpunordss %xmm3, %xmm3, %k1
 ; AVX512-NEXT:    vmovss %xmm4, %xmm3, %xmm3 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
@@ -1846,7 +1841,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vmovss %xmm5, %xmm6, %xmm6 {%k1}
 ; AVX512-NEXT:    vmovss %xmm4, %xmm5, %xmm5 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm6, %xmm5, %xmm4
-; AVX512-NEXT:    vcmpordss %xmm5, %xmm5, %k1
+; AVX512-NEXT:    vcmpunordss %xmm4, %xmm4, %k1
 ; AVX512-NEXT:    vmovss %xmm5, %xmm4, %xmm4 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
 ; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
@@ -1860,7 +1855,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vmovss %xmm5, %xmm6, %xmm6 {%k1}
 ; AVX512-NEXT:    vmovss %xmm4, %xmm5, %xmm5 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm6, %xmm5, %xmm4
-; AVX512-NEXT:    vcmpordss %xmm5, %xmm5, %k1
+; AVX512-NEXT:    vcmpunordss %xmm4, %xmm4, %k1
 ; AVX512-NEXT:    vmovss %xmm5, %xmm4, %xmm4 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
 ; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm0
@@ -1875,7 +1870,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vmovss %xmm1, %xmm5, %xmm5 {%k1}
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm5, %xmm1, %xmm0
-; AVX512-NEXT:    vcmpordss %xmm1, %xmm1, %k1
+; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
 ; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
@@ -1933,7 +1928,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
 ; X86-NEXT:  .LBB33_3:
 ; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X86-NEXT:    calll __extendhfsf2
@@ -1955,7 +1950,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
 ; X86-NEXT:  .LBB33_6:
 ; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X86-NEXT:    calll __truncsfhf2
@@ -1993,7 +1988,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
 ; X86-NEXT:  .LBB33_9:
 ; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X86-NEXT:    calll __extendhfsf2
@@ -2015,7 +2010,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
 ; X86-NEXT:  .LBB33_12:
 ; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X86-NEXT:    calll __truncsfhf2
@@ -2041,120 +2036,114 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; SSE2-NEXT:    pushq %rbp
 ; SSE2-NEXT:    pushq %r15
 ; SSE2-NEXT:    pushq %r14
+; SSE2-NEXT:    pushq %r13
+; SSE2-NEXT:    pushq %r12
 ; SSE2-NEXT:    pushq %rbx
 ; SSE2-NEXT:    subq $56, %rsp
-; SSE2-NEXT:    pextrw $0, %xmm1, %r14d
-; SSE2-NEXT:    pextrw $0, %xmm0, %r15d
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    psrld $16, %xmm2
-; SSE2-NEXT:    pextrw $0, %xmm2, %eax
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    psrld $16, %xmm2
-; SSE2-NEXT:    pextrw $0, %xmm2, %ecx
+; SSE2-NEXT:    psrlq $48, %xmm2
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    psrlq $48, %xmm3
+; SSE2-NEXT:    movdqa %xmm0, %xmm4
+; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[1,1]
+; SSE2-NEXT:    pextrw $0, %xmm4, %ebp
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[1,1]
+; SSE2-NEXT:    pextrw $0, %xmm4, %r15d
+; SSE2-NEXT:    pextrw $0, %xmm0, %r12d
+; SSE2-NEXT:    pextrw $0, %xmm1, %r13d
+; SSE2-NEXT:    psrld $16, %xmm0
+; SSE2-NEXT:    pextrw $0, %xmm0, %eax
+; SSE2-NEXT:    psrld $16, %xmm1
+; SSE2-NEXT:    pextrw $0, %xmm1, %ecx
 ; SSE2-NEXT:    shll $16, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm3
+; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    shll $16, %eax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    testl %ecx, %ecx
-; SSE2-NEXT:    movdqa %xmm3, %xmm7
-; SSE2-NEXT:    js .LBB34_2
-; SSE2-NEXT:  # %bb.1:
-; SSE2-NEXT:    movdqa %xmm2, %xmm7
-; SSE2-NEXT:  .LBB34_2:
-; SSE2-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT:    movdqa %xmm1, %xmm5
-; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[1,1]
-; SSE2-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
-; SSE2-NEXT:    movdqa %xmm0, %xmm6
-; SSE2-NEXT:    shufps {{.*#+}} xmm6 = xmm6[1,1],xmm0[1,1]
-; SSE2-NEXT:    movdqa %xmm7, %xmm0
-; SSE2-NEXT:    cmpordss %xmm7, %xmm0
-; SSE2-NEXT:    movaps %xmm0, %xmm4
-; SSE2-NEXT:    andps %xmm7, %xmm4
-; SSE2-NEXT:    js .LBB34_4
-; SSE2-NEXT:  # %bb.3:
-; SSE2-NEXT:    movdqa %xmm3, %xmm2
-; SSE2-NEXT:  .LBB34_4:
-; SSE2-NEXT:    pextrw $0, %xmm5, %ebp
-; SSE2-NEXT:    pextrw $0, %xmm6, %ebx
-; SSE2-NEXT:    maxss %xmm2, %xmm7
-; SSE2-NEXT:    andnps %xmm7, %xmm0
-; SSE2-NEXT:    orps %xmm4, %xmm0
+; SSE2-NEXT:    movd %eax, %xmm4
+; SSE2-NEXT:    js .LBB34_1
+; SSE2-NEXT:  # %bb.2:
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    jmp .LBB34_3
+; SSE2-NEXT:  .LBB34_1:
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:  .LBB34_3:
+; SSE2-NEXT:    pextrw $0, %xmm2, %ebx
+; SSE2-NEXT:    pextrw $0, %xmm3, %r14d
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    maxss %xmm0, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm3
+; SSE2-NEXT:    andnps %xmm2, %xmm3
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    orps %xmm3, %xmm0
 ; SSE2-NEXT:    callq __truncsfbf2@PLT
 ; SSE2-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT:    shll $16, %r15d
-; SSE2-NEXT:    movd %r15d, %xmm3
-; SSE2-NEXT:    shll $16, %r14d
-; SSE2-NEXT:    movd %r14d, %xmm2
-; SSE2-NEXT:    testl %r15d, %r15d
-; SSE2-NEXT:    movdqa %xmm3, %xmm1
-; SSE2-NEXT:    js .LBB34_6
+; SSE2-NEXT:    shll $16, %r13d
+; SSE2-NEXT:    movd %r13d, %xmm1
+; SSE2-NEXT:    shll $16, %r12d
+; SSE2-NEXT:    movd %r12d, %xmm2
+; SSE2-NEXT:    js .LBB34_4
 ; SSE2-NEXT:  # %bb.5:
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    jmp .LBB34_6
+; SSE2-NEXT:  .LBB34_4:
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm2, %xmm1
 ; SSE2-NEXT:  .LBB34_6:
-; SSE2-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Reload
-; SSE2-NEXT:    psrlq $48, %xmm5
-; SSE2-NEXT:    movdqa (%rsp), %xmm6 # 16-byte Reload
-; SSE2-NEXT:    psrlq $48, %xmm6
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    cmpordss %xmm1, %xmm0
-; SSE2-NEXT:    movaps %xmm0, %xmm4
-; SSE2-NEXT:    andps %xmm1, %xmm4
-; SSE2-NEXT:    js .LBB34_8
-; SSE2-NEXT:  # %bb.7:
-; SSE2-NEXT:    movdqa %xmm3, %xmm2
-; SSE2-NEXT:  .LBB34_8:
-; SSE2-NEXT:    pextrw $0, %xmm5, %r15d
-; SSE2-NEXT:    pextrw $0, %xmm6, %r14d
-; SSE2-NEXT:    maxss %xmm2, %xmm1
-; SSE2-NEXT:    andnps %xmm1, %xmm0
-; SSE2-NEXT:    orps %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    maxss %xmm0, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm3
+; SSE2-NEXT:    andnps %xmm2, %xmm3
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    orps %xmm3, %xmm0
 ; SSE2-NEXT:    callq __truncsfbf2@PLT
 ; SSE2-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT:    shll $16, %ebx
-; SSE2-NEXT:    movd %ebx, %xmm1
+; SSE2-NEXT:    shll $16, %r15d
+; SSE2-NEXT:    movd %r15d, %xmm1
 ; SSE2-NEXT:    shll $16, %ebp
-; SSE2-NEXT:    movd %ebp, %xmm3
-; SSE2-NEXT:    testl %ebx, %ebx
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    js .LBB34_10
-; SSE2-NEXT:  # %bb.9:
-; SSE2-NEXT:    movdqa %xmm3, %xmm2
-; SSE2-NEXT:  .LBB34_10:
+; SSE2-NEXT:    movd %ebp, %xmm2
+; SSE2-NEXT:    js .LBB34_7
+; SSE2-NEXT:  # %bb.8:
 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    cmpordss %xmm2, %xmm0
-; SSE2-NEXT:    movaps %xmm0, %xmm4
-; SSE2-NEXT:    andps %xmm2, %xmm4
-; SSE2-NEXT:    js .LBB34_12
-; SSE2-NEXT:  # %bb.11:
-; SSE2-NEXT:    movdqa %xmm1, %xmm3
-; SSE2-NEXT:  .LBB34_12:
-; SSE2-NEXT:    maxss %xmm3, %xmm2
-; SSE2-NEXT:    andnps %xmm2, %xmm0
-; SSE2-NEXT:    orps %xmm4, %xmm0
+; SSE2-NEXT:    jmp .LBB34_9
+; SSE2-NEXT:  .LBB34_7:
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:  .LBB34_9:
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    maxss %xmm0, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm3
+; SSE2-NEXT:    andnps %xmm2, %xmm3
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    orps %xmm3, %xmm0
 ; SSE2-NEXT:    callq __truncsfbf2@PLT
 ; SSE2-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
 ; SSE2-NEXT:    shll $16, %r14d
 ; SSE2-NEXT:    movd %r14d, %xmm1
-; SSE2-NEXT:    shll $16, %r15d
-; SSE2-NEXT:    movd %r15d, %xmm3
-; SSE2-NEXT:    testl %r14d, %r14d
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    js .LBB34_14
-; SSE2-NEXT:  # %bb.13:
-; SSE2-NEXT:    movdqa %xmm3, %xmm2
-; SSE2-NEXT:  .LBB34_14:
+; SSE2-NEXT:    shll $16, %ebx
+; SSE2-NEXT:    movd %ebx, %xmm2
+; SSE2-NEXT:    js .LBB34_10
+; SSE2-NEXT:  # %bb.11:
 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    cmpordss %xmm2, %xmm0
-; SSE2-NEXT:    movaps %xmm0, %xmm4
-; SSE2-NEXT:    andps %xmm2, %xmm4
-; SSE2-NEXT:    js .LBB34_16
-; SSE2-NEXT:  # %bb.15:
-; SSE2-NEXT:    movdqa %xmm1, %xmm3
-; SSE2-NEXT:  .LBB34_16:
-; SSE2-NEXT:    maxss %xmm3, %xmm2
-; SSE2-NEXT:    andnps %xmm2, %xmm0
-; SSE2-NEXT:    orps %xmm4, %xmm0
+; SSE2-NEXT:    jmp .LBB34_12
+; SSE2-NEXT:  .LBB34_10:
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:  .LBB34_12:
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    maxss %xmm0, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm3
+; SSE2-NEXT:    andnps %xmm2, %xmm3
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    orps %xmm3, %xmm0
 ; SSE2-NEXT:    callq __truncsfbf2@PLT
 ; SSE2-NEXT:    movdqa (%rsp), %xmm1 # 16-byte Reload
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
@@ -2164,6 +2153,8 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    addq $56, %rsp
 ; SSE2-NEXT:    popq %rbx
+; SSE2-NEXT:    popq %r12
+; SSE2-NEXT:    popq %r13
 ; SSE2-NEXT:    popq %r14
 ; SSE2-NEXT:    popq %r15
 ; SSE2-NEXT:    popq %rbp
@@ -2205,7 +2196,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX1-NEXT:    vpextrw $0, %xmm2, %ebp
 ; AVX1-NEXT:    vpextrw $0, %xmm3, %r15d
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    callq __truncsfbf2@PLT
 ; AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2222,7 +2213,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX1-NEXT:    vmovdqa %xmm2, %xmm0
 ; AVX1-NEXT:  .LBB34_6:
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    callq __truncsfbf2@PLT
 ; AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2239,7 +2230,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX1-NEXT:    vmovdqa %xmm2, %xmm0
 ; AVX1-NEXT:  .LBB34_9:
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    callq __truncsfbf2@PLT
 ; AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
@@ -2256,7 +2247,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX1-NEXT:    vmovdqa %xmm2, %xmm0
 ; AVX1-NEXT:  .LBB34_12:
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    callq __truncsfbf2@PLT
 ; AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
@@ -2305,7 +2296,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
-; AVX512-NEXT:    vcmpordss %xmm1, %xmm1, %k1
+; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    callq __truncsfbf2@PLT
 ; AVX512-NEXT:    vpextrw $0, %xmm0, {{[0-9]+}}(%rsp)
@@ -2319,7 +2310,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
-; AVX512-NEXT:    vcmpordss %xmm1, %xmm1, %k1
+; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    callq __truncsfbf2@PLT
 ; AVX512-NEXT:    vpextrw $0, %xmm0, (%rsp)
@@ -2333,7 +2324,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
-; AVX512-NEXT:    vcmpordss %xmm1, %xmm1, %k1
+; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    callq __truncsfbf2@PLT
 ; AVX512-NEXT:    vpextrw $0, %xmm0, {{[0-9]+}}(%rsp)
@@ -2347,7 +2338,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
-; AVX512-NEXT:    vcmpordss %xmm1, %xmm1, %k1
+; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    callq __truncsfbf2@PLT
 ; AVX512-NEXT:    vpextrw $0, %xmm0, {{[0-9]+}}(%rsp)
@@ -2400,7 +2391,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; X86-NEXT:    vpextrw $0, %xmm2, %edi
 ; X86-NEXT:    vpextrw $0, %xmm3, %ebp
 ; X86-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    shll $16, %ecx
@@ -2416,7 +2407,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; X86-NEXT:    vmovdqa %xmm2, %xmm0
 ; X86-NEXT:  .LBB34_6:
 ; X86-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X86-NEXT:    calll __truncsfbf2
@@ -2436,7 +2427,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; X86-NEXT:    vmovdqa %xmm2, %xmm0
 ; X86-NEXT:  .LBB34_9:
 ; X86-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X86-NEXT:    calll __truncsfbf2
@@ -2456,7 +2447,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; X86-NEXT:    vmovdqa %xmm2, %xmm0
 ; X86-NEXT:  .LBB34_12:
 ; X86-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X86-NEXT:    calll __truncsfbf2
diff --git a/llvm/test/CodeGen/X86/fp-undef.ll b/llvm/test/CodeGen/X86/fp-undef.ll
index 227f007..c358085 100644
--- a/llvm/test/CodeGen/X86/fp-undef.ll
+++ b/llvm/test/CodeGen/X86/fp-undef.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown                        | FileCheck %s --check-prefix=ANY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -enable-unsafe-fp-math | FileCheck %s --check-prefix=ANY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ANY
 
 ; This is duplicated from tests for InstSimplify. If you're
 ; adding something here, you should probably add it there too.
diff --git a/llvm/test/CodeGen/X86/fp128-select.ll b/llvm/test/CodeGen/X86/fp128-select.ll
index 659e4dd..27a651e 100644
--- a/llvm/test/CodeGen/X86/fp128-select.ll
+++ b/llvm/test/CodeGen/X86/fp128-select.ll
@@ -13,8 +13,8 @@ define void @test_select(ptr %p, ptr %q, i1 zeroext %c) nounwind {
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    testl %edx, %edx
 ; SSE-NEXT:    jne .LBB0_1
-; SSE-NEXT:  # %bb.3:
-; SSE-NEXT:    movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:  # %bb.2:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [NaN]
 ; SSE-NEXT:    movaps %xmm0, (%rsi)
 ; SSE-NEXT:    retq
 ; SSE-NEXT:  .LBB0_1:
@@ -58,7 +58,7 @@ define fp128 @test_select_cc(fp128, fp128) nounwind {
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    jmp .LBB1_3
 ; SSE-NEXT:  .LBB1_1:
-; SSE-NEXT:    movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1.0E+0]
 ; SSE-NEXT:  .LBB1_3: # %BB0
 ; SSE-NEXT:    testl %ebx, %ebx
 ; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
diff --git a/llvm/test/CodeGen/X86/fsxor-alignment.ll b/llvm/test/CodeGen/X86/fsxor-alignment.ll
index 6fa4a31..32af5b9 100644
--- a/llvm/test/CodeGen/X86/fsxor-alignment.ll
+++ b/llvm/test/CodeGen/X86/fsxor-alignment.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s
 
 ; Don't fold the incoming stack arguments into the xorps instructions used
 ; to do floating-point negations, because the arguments aren't vectors
diff --git a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
index f710a30..bd997d1 100644
--- a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
+++ b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse < %s | FileCheck %s
 
 ; The debug info in this test case was causing a crash because machine trace metrics
 ; did not correctly ignore debug instructions. The check lines ensure that the
diff --git a/llvm/test/CodeGen/X86/neg_fp.ll b/llvm/test/CodeGen/X86/neg_fp.ll
index 8020982..18ded50 100644
--- a/llvm/test/CodeGen/X86/neg_fp.ll
+++ b/llvm/test/CodeGen/X86/neg_fp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-- -mattr=+sse4.1 | FileCheck %s
 
-; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
+; Test that when we don't, we don't do the optimization
 ; -0 - (A - B) to (B - A) because A==B, -0 != 0
 
 define float @negfp(float %a, float %b) nounwind {
diff --git a/llvm/test/CodeGen/X86/negate-add-zero.ll b/llvm/test/CodeGen/X86/negate-add-zero.ll
index eb4e2d3..4884832 100644
--- a/llvm/test/CodeGen/X86/negate-add-zero.ll
+++ b/llvm/test/CodeGen/X86/negate-add-zero.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s | FileCheck %s
 ; PR3374
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/llvm/test/CodeGen/X86/recip-pic.ll b/llvm/test/CodeGen/X86/recip-pic.ll
index d01ecc1..d2620e7 100644
--- a/llvm/test/CodeGen/X86/recip-pic.ll
+++ b/llvm/test/CodeGen/X86/recip-pic.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i386-unknown-linux-gnu  -enable-unsafe-fp-math -mcpu=slm -relocation-model=pic | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu  -mcpu=slm -relocation-model=pic | FileCheck %s --check-prefix=CHECK
 
 define fastcc float @foo(float %x) unnamed_addr #0 {
 ; CHECK-LABEL: foo:
diff --git a/llvm/test/CodeGen/X86/sincos-opt.ll b/llvm/test/CodeGen/X86/sincos-opt.ll
index 6885456..51f3e52 100644
--- a/llvm/test/CodeGen/X86/sincos-opt.ll
+++ b/llvm/test/CodeGen/X86/sincos-opt.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_SINCOS
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_NOOPT
 ; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=core2 | FileCheck %s --check-prefix=GNU_SINCOS
-; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=core2 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH
-; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 -mcpu=core2 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=core2 | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 -mcpu=core2 | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH
 ; RUN: llc < %s -mtriple=x86_64-fuchsia -mcpu=core2 | FileCheck %s --check-prefix=GNU_SINCOS
-; RUN: llc < %s -mtriple=x86_64-fuchsia -mcpu=core2 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH
+; RUN: llc < %s -mtriple=x86_64-fuchsia -mcpu=core2 | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH
 ; RUN: llc < %s -mtriple=x86_64-scei-ps4 -mcpu=btver2 | FileCheck %s --check-prefix=PS4_SINCOS
 ; RUN: llc < %s -mtriple=x86_64-sie-ps5  -mcpu=znver2 | FileCheck %s --check-prefix=PS4_SINCOS
 
diff --git a/llvm/test/CodeGen/X86/sincos.ll b/llvm/test/CodeGen/X86/sincos.ll
index 7903407..9206c25 100644
--- a/llvm/test/CodeGen/X86/sincos.ll
+++ b/llvm/test/CodeGen/X86/sincos.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; Make sure this testcase codegens to the sin and cos instructions, not calls
-; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3  | FileCheck %s
 ; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 | FileCheck %s
 
 declare float  @sinf(float) readonly
diff --git a/llvm/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll b/llvm/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
index c0beb6f..2822d40 100644
--- a/llvm/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
+++ b/llvm/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math | FileCheck %s --check-prefix=CST --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+sse4.1 | FileCheck %s --check-prefix=CST --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx | FileCheck %s --check-prefix=CST --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
-; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64 | FileCheck %s --check-prefix=CST --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CST --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx | FileCheck %s --check-prefix=CST --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL
 
 ; Check that the constant used in the vectors are the right ones.
 ; SSE2: [[MASKCSTADDR:.LCPI[0-9_]+]]:
diff --git a/llvm/test/Instrumentation/AllocToken/basic.ll b/llvm/test/Instrumentation/AllocToken/basic.ll
index 099d37d..0c34b137 100644
--- a/llvm/test/Instrumentation/AllocToken/basic.ll
+++ b/llvm/test/Instrumentation/AllocToken/basic.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=increment -S | FileCheck %s
+; RUN: opt < %s -passes='inferattrs,alloc-token<mode=increment>' -S | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/llvm/test/Instrumentation/AllocToken/basic32.ll b/llvm/test/Instrumentation/AllocToken/basic32.ll
index 944a452..52d1d14 100644
--- a/llvm/test/Instrumentation/AllocToken/basic32.ll
+++ b/llvm/test/Instrumentation/AllocToken/basic32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=increment -S | FileCheck %s
+; RUN: opt < %s -passes='inferattrs,alloc-token<mode=increment>' -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 
diff --git a/llvm/test/Instrumentation/AllocToken/fast.ll b/llvm/test/Instrumentation/AllocToken/fast.ll
index 19a3ef6..f6bf5ee 100644
--- a/llvm/test/Instrumentation/AllocToken/fast.ll
+++ b/llvm/test/Instrumentation/AllocToken/fast.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=increment -alloc-token-fast-abi -alloc-token-max=3 -S | FileCheck %s
+; RUN: opt < %s -passes='inferattrs,alloc-token<mode=increment>' -alloc-token-fast-abi -alloc-token-max=3 -S | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/llvm/test/Instrumentation/AllocToken/intrinsic.ll b/llvm/test/Instrumentation/AllocToken/intrinsic.ll
new file mode 100644
index 0000000..5c6f2f1
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/intrinsic.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; Test that the alloc-token pass lowers the intrinsic to a constant token ID.
+;
+; RUN: opt < %s -passes='alloc-token<mode=typehashpointersplit>' -alloc-token-max=2 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i64 @llvm.alloc.token.id.i64(metadata)
+
+define i64 @test_intrinsic_lowering() {
+; CHECK-LABEL: define i64 @test_intrinsic_lowering() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %token_no_ptr = call i64 @llvm.alloc.token.id.i64(metadata !0)
+  ret i64 %token_no_ptr
+}
+
+define i64 @test_intrinsic_lowering_ptr() {
+; CHECK-LABEL: define i64 @test_intrinsic_lowering_ptr() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i64 1
+;
+entry:
+  %token_with_ptr = call i64 @llvm.alloc.token.id.i64(metadata !1)
+  ret i64 %token_with_ptr
+}
+
+!0 = !{!"NoPointerType", i1 false}
+!1 = !{!"PointerType", i1 true}
diff --git a/llvm/test/Instrumentation/AllocToken/intrinsic32.ll b/llvm/test/Instrumentation/AllocToken/intrinsic32.ll
new file mode 100644
index 0000000..15f7c25
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/intrinsic32.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; Test that the alloc-token pass lowers the intrinsic to a constant token ID.
+;
+; RUN: opt < %s -passes='alloc-token<mode=typehashpointersplit>' -alloc-token-max=2 -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+declare i32 @llvm.alloc.token.id.i32(metadata)
+
+define i32 @test_intrinsic_lowering() {
+; CHECK-LABEL: define i32 @test_intrinsic_lowering() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %token_no_ptr = call i32 @llvm.alloc.token.id.i32(metadata !0)
+  ret i32 %token_no_ptr
+}
+
+define i32 @test_intrinsic_lowering_ptr() {
+; CHECK-LABEL: define i32 @test_intrinsic_lowering_ptr() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i32 1
+;
+entry:
+  %token_with_ptr = call i32 @llvm.alloc.token.id.i32(metadata !1)
+  ret i32 %token_with_ptr
+}
+
+!0 = !{!"NoPointerType", i1 false}
+!1 = !{!"PointerType", i1 true}
diff --git a/llvm/test/Instrumentation/AllocToken/invoke.ll b/llvm/test/Instrumentation/AllocToken/invoke.ll
index 347c99a..8e7ab38 100644
--- a/llvm/test/Instrumentation/AllocToken/invoke.ll
+++ b/llvm/test/Instrumentation/AllocToken/invoke.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=increment -S | FileCheck %s
+; RUN: opt < %s -passes='inferattrs,alloc-token<mode=increment>' -S | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
index 19673da..45f573e 100644
--- a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
+++ b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=increment -alloc-token-extended -S | FileCheck %s
+; RUN: opt < %s -passes='inferattrs,alloc-token<mode=increment>' -alloc-token-extended -S | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/llvm/test/Instrumentation/AllocToken/typehashpointersplit.ll b/llvm/test/Instrumentation/AllocToken/typehashpointersplit.ll
index 1f77648..4d1be5e 100644
--- a/llvm/test/Instrumentation/AllocToken/typehashpointersplit.ll
+++ b/llvm/test/Instrumentation/AllocToken/typehashpointersplit.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s
+; RUN: opt < %s -passes='inferattrs,alloc-token<mode=typehashpointersplit>' -alloc-token-max=2 -S | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
index 1ddcd4b..1c869bd 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s
+; RUN: opt -S -passes=msan -mattr=+sme -o - %s
 
 ; XFAIL: *
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
index 9caa89d..00cf3204 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s
+; RUN: opt -S -passes=msan -mattr=+sme -o - %s
 
 ; XFAIL: *
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
new file mode 100644
index 0000000..3f43efa
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s
+
+; XFAIL: *
+
+; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
+; Manually reduced to show MSan leads to a compiler crash
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+  %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+  %2 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %1, ptr %ptr)
+  ret void
+}
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
new file mode 100644
index 0000000..cd04373
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
@@ -0,0 +1,340 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s
+
+; XFAIL: *
+
+; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,  <vscale x 4 x i32> %zm) sanitize_memory {
+  call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice,
+  <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zm)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice.7,
+  <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zm)
+  ret void
+}
+
+define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,  <vscale x 2 x i64> %zm) sanitize_memory {
+  call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zm)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice.7,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zm)
+  ret void
+}
+
+
+define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+  <vscale x 4 x i32> %zm) sanitize_memory {
+  call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice,
+  <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+  <vscale x 4 x i32> %zm)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice.7,
+  <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+  <vscale x 4 x i32> %zm)
+  ret void
+}
+
+define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+  <vscale x 2 x i64> %zm) sanitize_memory {
+  call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 %slice,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+  <vscale x 2 x i64> %zm)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 %slice.7,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+  <vscale x 2 x i64> %zm)
+  ret void
+}
+
+
+define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) sanitize_memory {
+  call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice,
+  <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice.7,
+  <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
+  ret void
+}
+
+
+define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) sanitize_memory {
+  call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice.7,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
+  ret void
+}
+
+
+
+define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+  <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1,
+  <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3) sanitize_memory {
+  call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 %slice,
+  <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+  <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1,
+  <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 %slice.7,
+  <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+  <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1,
+  <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3)
+  ret void
+}
+
+define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+  <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1,
+  <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3) sanitize_memory {
+  call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 %slice,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+  <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1,
+  <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 %slice.7,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+  <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1,
+  <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3)
+  ret void
+}
+
+define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) sanitize_memory {
+  call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice,<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice.7, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
+  ret void
+}
+
+define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) sanitize_memory {
+  call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice.7, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1)
+  ret void
+}
+
+define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) sanitize_memory {
+  call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice,
+  <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice.7,
+  <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1)
+  ret void
+}
+
+define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) sanitize_memory {
+  call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice,
+  <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice.7,
+  <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1)
+  ret void
+}
+
+define void @multi_vector_add_za_vg1x2_f64_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+entry:
+  %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+  %1 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %ptr)
+  %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 0
+  %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
+  %4 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %arrayidx2)
+  %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %4, 0
+  %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %4, 1
+  call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> %2, <vscale x 2 x double> %5)
+  call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> %3, <vscale x 2 x double> %6)
+  ret void
+}
+
+
+define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) sanitize_memory {
+  call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice,
+  <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice.7,
+  <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+  <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3)
+  ret void
+}
+
+define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) sanitize_memory {
+  call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice.7,
+  <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+  <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3)
+  ret void
+}
+
+define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) sanitize_memory {
+  call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice,
+  <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
+  <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice.7,
+  <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
+  <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3)
+  ret void
+}
+
+define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+entry:
+  %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+  %1 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %ptr)
+  %2 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 0
+  %3 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 1
+  %4 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 2
+  %5 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 3
+  %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
+  %6 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx2)
+  %7 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 0
+  %8 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 1
+  %9 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 2
+  %10 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 3
+  %mul3 = shl i64 %stride, 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3
+  %11 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx4)
+  %12 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 0
+  %13 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 1
+  %14 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 2
+  %15 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 3
+  %mul5 = mul i64 %stride, 3
+  %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5
+  %16 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx6)
+  %17 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 0
+  %18 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 1
+  %19 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 2
+  %20 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 3
+  call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %2, <vscale x 4 x float> %7, <vscale x 4 x float> %12, <vscale x 4 x float> %17)
+  call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %3, <vscale x 4 x float> %8, <vscale x 4 x float> %13, <vscale x 4 x float> %18)
+  call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %4, <vscale x 4 x float> %9, <vscale x 4 x float> %14, <vscale x 4 x float> %19)
+  call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %5, <vscale x 4 x float> %10, <vscale x 4 x float> %15, <vscale x 4 x float> %20)
+  ret void
+}
+
+define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) sanitize_memory {
+  call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice,
+  <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
+  <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3)
+  %slice.7 = add i32 %slice, 7
+  call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice.7,
+  <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
+  <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3)
+  ret void
+}
+
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) sanitize_memory {
+  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> }
+  @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2,
+  <vscale x 16 x i8> %zm)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) sanitize_memory {
+  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> }
+  @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2,
+  <vscale x 8 x i16> %zm)
+  ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) sanitize_memory {
+  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> }
+  @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2,
+  <vscale x 4 x i32> %zm)
+  ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
+}
+
+define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) sanitize_memory {
+  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> }
+  @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2,
+  <vscale x 2 x i64> %zm)
+  ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
+}
+
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8>%zm) sanitize_memory {
+  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
+  @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2,
+  <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
+  <vscale x 16 x i8> %zm)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_x4_single_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) sanitize_memory {
+  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
+  @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2,
+  <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
+  <vscale x 8 x i16> %zm)
+  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_x4_single_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) sanitize_memory {
+  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
+  @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2,
+  <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
+  <vscale x 4 x i32> %zm)
+  ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
+}
+
+define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_x4_single_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) sanitize_memory {
+  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
+  @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2,
+  <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
+  <vscale x 2 x i64> %zm)
+  ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
+}
+declare void@llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare void@llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare void@llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare void@llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare void@llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare void@llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare void@llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare void@llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare void@llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32, <vscale x 4 x i32>,<vscale x 4 x i32>)
+declare void@llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32, <vscale x 2 x i64>,<vscale x 2 x i64>)
+declare void@llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32, <vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
+declare void@llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32, <vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare void@llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32, <vscale x 4 x float>, <vscale x 4 x float>)
+declare void@llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32, <vscale x 2 x double>, <vscale x 2 x double>)
+declare void@llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32, <vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>)
+declare void@llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32, <vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>)
+declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll
index 434ac84..3d759f7 100644
--- a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll
+++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll
@@ -865,33 +865,6 @@ entry:
   ret float %r
 }
 
-; Note that the `unsafe-fp-math` from the function attributes should be moved to
-; individual instructions, with the shadow instructions NOT getting the attribute.
-define float @param_add_return_float_unsafe_fp_math(float %a) #0 {
-; CHECK-LABEL: @param_add_return_float_unsafe_fp_math(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_float_unsafe_fp_math to i64)
-; CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = fpext float [[A:%.*]] to double
-; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]]
-; CHECK-NEXT:    store i64 0, ptr @__nsan_shadow_args_tag, align 8
-; CHECK-NEXT:    [[B:%.*]] = fadd fast float [[A]], 1.000000e+00
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00
-; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP5]], i32 1, i64 0)
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = fpext float [[B]] to double
-; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[TMP7]], double [[TMP8]], double [[TMP5]]
-; CHECK-NEXT:    store i64 ptrtoint (ptr @param_add_return_float_unsafe_fp_math to i64), ptr @__nsan_shadow_ret_tag, align 8
-; CHECK-NEXT:    store double [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 8
-; CHECK-NEXT:    ret float [[B]]
-;
-entry:
-  %b = fadd float %a, 1.0
-  ret float %b
-}
-
-
 define void @truncate(<2 x double> %0) sanitize_numerical_stability {
 ; DQQ-LABEL: @truncate(
 ; DQQ-NEXT:  entry:
@@ -941,4 +914,4 @@ entry:
 }
 
 
-attributes #0 = { nounwind readonly uwtable sanitize_numerical_stability "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind readonly uwtable sanitize_numerical_stability "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
diff --git a/llvm/test/LTO/AArch64/Inputs/foo.ll b/llvm/test/LTO/AArch64/Inputs/foo.ll
deleted file mode 100644
index 961b0d4..0000000
--- a/llvm/test/LTO/AArch64/Inputs/foo.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64-unknown-linux-gnu"
-
-define dso_local i32 @foo() #0 {
-entry:
-  ret i32 42
-}
-
-attributes #0 = { noinline nounwind optnone uwtable }
-
-!llvm.module.flags = !{!0, !1, !2, !3}
-
-!0 = !{i32 8, !"branch-target-enforcement", i32 1}
-!1 = !{i32 8, !"sign-return-address", i32 1}
-!2 = !{i32 8, !"sign-return-address-all", i32 1}
-!3 = !{i32 8, !"sign-return-address-with-bkey", i32 1}
diff --git a/llvm/test/LTO/AArch64/TestInputs/bar.ll b/llvm/test/LTO/AArch64/TestInputs/bar.ll
new file mode 100644
index 0000000..7c2a753
--- /dev/null
+++ b/llvm/test/LTO/AArch64/TestInputs/bar.ll
@@ -0,0 +1,35 @@
+;; This file contains the new semantic of the branch-target-enforcement, sign-return-address.
+;; Used for test mixing a mixed link case and also verify the import too in llc.
+
+; RUN: llc -mattr=+pauth -mattr=+bti %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define dso_local void @bar() #0 {
+entry:
+  ret void
+}
+; CHECK-LABEL: bar:
+; CHECK-NOT:       hint
+; CHECK-NOT:       bti
+; CHECK:           ret
+
+define dso_local void @baz() #1 {
+entry:
+  ret void
+}
+
+; CHECK-LABEL: baz:
+; CHECK:           bti c
+; CHECK:           ret
+
+attributes #0 = { noinline nounwind optnone uwtable }
+attributes #1 = { noinline nounwind optnone uwtable "branch-target-enforcement" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 8, !"branch-target-enforcement", i32 2}
+!1 = !{i32 8, !"sign-return-address", i32 2}
+!2 = !{i32 8, !"sign-return-address-all", i32 2}
+!3 = !{i32 8, !"sign-return-address-with-bkey", i32 2}
diff --git a/llvm/test/LTO/AArch64/TestInputs/fiz.ll b/llvm/test/LTO/AArch64/TestInputs/fiz.ll
new file mode 100644
index 0000000..e578426
--- /dev/null
+++ b/llvm/test/LTO/AArch64/TestInputs/fiz.ll
@@ -0,0 +1,41 @@
+;; This file contains the previous semantic of the branch-target-enforcement, sign-return-address.
+;; Used for test mixing a mixed link case and also verify the import too in llc.
+
+; RUN: llc -mattr=+pauth -mattr=+bti %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @func()
+
+define i32 @fiz_on() #0 {
+entry:
+  call void @func()
+  ret i32 42
+}
+
+; CHECK-LABEL: fiz_on:
+; CHECK:           paciasp
+; CHECK:           bl func
+; CHECK:           retaa
+
+define i32 @fiz_off() #1 {
+entry:
+  ret i32 43
+}
+
+; CHECK-LABEL: fiz_off:
+; CHECK-NOT:       pac
+; CHECK-NOT:       hint
+; CHECK-NOT:       bti
+; CHECK:           ret
+
+attributes #0 = { noinline nounwind optnone uwtable }
+attributes #1 = { noinline nounwind optnone uwtable "branch-target-enforcement"="false" "sign-return-address"="none" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 8, !"branch-target-enforcement", i32 1}
+!1 = !{i32 8, !"sign-return-address", i32 1}
+!2 = !{i32 8, !"sign-return-address-all", i32 0}
+!3 = !{i32 8, !"sign-return-address-with-bkey", i32 0}
diff --git a/llvm/test/LTO/AArch64/TestInputs/foo.ll b/llvm/test/LTO/AArch64/TestInputs/foo.ll
new file mode 100644
index 0000000..689d938
--- /dev/null
+++ b/llvm/test/LTO/AArch64/TestInputs/foo.ll
@@ -0,0 +1,38 @@
+;; This file contains the previous semantic of the branch-target-enforcement, sign-return-address.
+;; Used for test mixing a mixed link case and also verify the import too in llc.
+
+; RUN: llc -mattr=+pauth -mattr=+bti %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @foo_on() #0 {
+entry:
+  ret i32 42
+}
+
+; CHECK-LABEL: foo_on:
+; CHECK:           pacibsp
+; CHECK:           mov
+; CHECK:           retab
+
+define i32 @foo_off() #1 {
+entry:
+  ret i32 43
+}
+
+; CHECK-LABEL: foo_off:
+; CHECK-NOT:       pac
+; CHECK-NOT:       hint
+; CHECK-NOT:       bti
+; CHECK:           ret
+
+attributes #0 = { noinline nounwind optnone uwtable }
+attributes #1 = { noinline nounwind optnone uwtable "branch-target-enforcement"="false" "sign-return-address"="none" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 8, !"branch-target-enforcement", i32 1}
+!1 = !{i32 8, !"sign-return-address", i32 1}
+!2 = !{i32 8, !"sign-return-address-all", i32 1}
+!3 = !{i32 8, !"sign-return-address-with-bkey", i32 1}
diff --git a/llvm/test/LTO/AArch64/TestInputs/old.ll b/llvm/test/LTO/AArch64/TestInputs/old.ll
new file mode 100644
index 0000000..2b1758b
--- /dev/null
+++ b/llvm/test/LTO/AArch64/TestInputs/old.ll
@@ -0,0 +1,59 @@
+;; This file contains the previous semantic of the branch-target-enforcement, sign-return-address.
+;; Used for test mixing a mixed link case and also verify the import too in llc.
+
+; RUN: llc -mattr=+pauth -mattr=+bti %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @old_bti() #0 {
+entry:
+  ret i32 2
+}
+
+; CHECK-LABEL: old_bti:
+; CHECK:           bti c
+; CHECK:           mov
+; CHECK:           ret
+
+define i32 @old_pac() #1 {
+entry:
+  ret i32 2
+}
+
+; CHECK-LABEL: old_pac:
+; CHECK:           paciasp
+; CHECK:           mov
+; CHECK:           retaa
+
+
+define i32 @old_none() #2 {
+entry:
+  ret i32 3
+}
+
+; CHECK-LABEL: old_none:
+; CHECK-NOT:           hint
+; CHECK-NOT:           paci
+; CHECK-NOT:           bti
+; CHECK:           ret
+
+declare i32 @func(i32)
+
+define i32 @old_none_leaf() #3 {
+entry:
+  %0 = call i32 @func()
+  ret i32 %0
+}
+
+; CHECK-LABEL: old_none_leaf:
+; CHECK:           paciasp
+; CHECK:           bl      func
+; CHECK:           retaa
+
+attributes #0 = { noinline nounwind optnone "branch-target-enforcement"="true" }
+attributes #1 = { noinline nounwind optnone "branch-target-enforcement"="false" "sign-return-address"="all" "sign-return-address-key"="a_key" }
+attributes #2 = { noinline nounwind optnone "branch-target-enforcement"="false" "sign-return-address"="none" }
+attributes #3 = { noinline nounwind optnone "branch-target-enforcement"="false" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" }
+
+;; Intentionally no module flags
diff --git a/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll b/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll
index b3c9828..aef8907 100644
--- a/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll
+++ b/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll
@@ -1,10 +1,10 @@
-; Testcase to check that module with different branch-target-enforcement can
-; be mixed.
-;
+;; Testcase to check that module with different branch-target-enforcement can
+;; be mixed.
+;;
 ; RUN: llvm-as %s -o %t1.bc
-; RUN: llvm-as %p/Inputs/foo.ll -o %t2.bc
+; RUN: llvm-as %p/TestInputs/foo.ll -o %t2.bc
 ; RUN: llvm-lto -exported-symbol main \
-; RUN:          -exported-symbol foo \
+; RUN:          -exported-symbol foo_on \
 ; RUN:          -filetype=obj \
 ; RUN:           %t1.bc %t2.bc \
 ; RUN:           -o %t1.exe 2>&1 | FileCheck --allow-empty %s
@@ -14,11 +14,11 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-declare i32 @foo();
+declare i32 @foo_on();
 
 define i32 @main() "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" {
 entry:
-  %add = call i32 @foo()
+  %add = call i32 @foo_on()
   ret i32 %add
 }
 
@@ -30,9 +30,12 @@ entry:
 
 ; CHECK-NOT: linking module flags 'branch-target-enforcement': IDs have conflicting values in
 ; CHECK-DUMP: <main>:
+; CHECK-DUMP:      paciasp
+; CHECK-DUMP:      str
 ; CHECK-DUMP:      bl      0x8 <main+0x8>
-; CHECK-DUMP: <foo>:
+; CHECK-DUMP: <foo_on>:
+; CHECK-DUMP:     pacibsp
 
-; `main` doesn't support BTI while `foo` does, so in the binary
-; we should see only PAC which is supported by both.
+;; `main` doesn't support BTI while `foo` does, so in the binary
+;; we should see only PAC which is supported by both.
 ; CHECK-PROP:   Properties: aarch64 feature: PAC
 \ No newline at end of file
diff --git a/llvm/test/LTO/AArch64/link-sign-return-address.ll b/llvm/test/LTO/AArch64/link-sign-return-address.ll
new file mode 100644
index 0000000..df6276f
--- /dev/null
+++ b/llvm/test/LTO/AArch64/link-sign-return-address.ll
@@ -0,0 +1,127 @@
+;; Testcase to check that module with different sign return address can
+;; be mixed.
+;
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/TestInputs/foo.ll -o %t2.bc
+; RUN: llvm-as %p/TestInputs/fiz.ll -o %t3.bc
+; RUN: llvm-as %p/TestInputs/bar.ll -o %t4.bc
+; RUN: llvm-as %p/TestInputs/old.ll -o %t5.bc
+; RUN: llvm-lto -exported-symbol main \
+; RUN:          -exported-symbol foo_on \
+; RUN:          -exported-symbol foo_off \
+; RUN:          -exported-symbol fiz_on \
+; RUN:          -exported-symbol fiz_off \
+; RUN:          -exported-symbol bar \
+; RUN:          -exported-symbol baz \
+; RUN:          -exported-symbol old_bti \
+; RUN:          -exported-symbol old_pac \
+; RUN:          -exported-symbol old_none \
+; RUN:          -filetype=obj \
+; RUN:          %t5.bc %t4.bc %t3.bc %t2.bc %t1.bc \
+; RUN:           -o %t1.exe 2>&1
+; RUN: llvm-objdump -d %t1.exe | FileCheck --check-prefix=CHECK-DUMP %s
+; RUN: llvm-readelf -n %t1.exe | FileCheck --allow-empty --check-prefix=CHECK-PROP %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare i32 @foo_on();
+declare i32 @foo_off();
+declare i32 @fiz_on();
+declare i32 @fiz_off();
+declare void @baz();
+declare void @bar();
+declare i32 @old_bti();
+declare i32 @old_pac();
+declare i32 @old_none();
+
+define i32 @main() #0 {
+entry:
+  call i32 @foo_on()
+  call i32 @foo_off()
+  call i32 @fiz_on()
+  call i32 @fiz_off()
+  call void @bar()
+  call void @baz()
+  call i32 @old_bti()
+  call i32 @old_pac()
+  call i32 @old_none()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone }
+
+!llvm.module.flags = !{!0, !1, !2, !3 }
+!0 = !{i32 8, !"branch-target-enforcement", i32 0}
+!1 = !{i32 8, !"sign-return-address", i32 0}
+!2 = !{i32 8, !"sign-return-address-all", i32 0}
+!3 = !{i32 8, !"sign-return-address-with-bkey", i32 0}
+
+
+; CHECK-DUMP-LABEL: <old_bti>:
+; CHECK-DUMP-NEXT:     bti c
+; CHECK-DUMP-NEXT:     mov     w0, #0x2
+; CHECK-DUMP-NEXT:     ret
+
+; CHECK-DUMP-LABEL: <old_pac>:
+; CHECK-DUMP-NEXT:     paciasp
+; CHECK-DUMP-NEXT:     mov     w0, #0x2
+; CHECK-DUMP-NEXT:     autiasp
+; CHECK-DUMP-NEXT:     ret
+
+; CHECK-DUMP-LABEL: <old_none>:
+; CHECK-DUMP-NEXT:     mov     w0, #0x3
+; CHECK-DUMP-NEXT:     ret
+
+; CHECK-DUMP-LABEL: <bar>:
+; CHECK-DUMP-NEXT:     ret
+
+; CHECK-DUMP-LABEL: <baz>:
+; CHECK-DUMP-NEXT:     bti c
+; CHECK-DUMP-NEXT:     ret
+
+;; fiz.ll represents a module with the old style of the function attributes.
+;; fiz_on shall have PAC with A-key as it requested at module level.
+; CHECK-DUMP-LABEL: <fiz_on>:
+; CHECK-DUMP-NEXT:     paciasp
+; CHECK-DUMP-NEXT:     str     x30, [sp, #-0x10]!
+; CHECK-DUMP-NEXT:     bl      0x38 <fiz_on+0x8>
+; CHECK-DUMP-NEXT:     mov     w0, #0x2a
+; CHECK-DUMP-NEXT:     ldr     x30, [sp], #0x10
+; CHECK-DUMP-NEXT:     autiasp
+; CHECK-DUMP-NEXT:     ret
+
+;; fiz_off shall not have BTI or PAC instructions as they are disabled at function scope.
+; CHECK-DUMP-LABEL:  <fiz_off>:
+; CHECK-DUMP-NEXT:       mov     w0, #0x2b
+; CHECK-DUMP-NEXT:       ret
+
+;; foo.ll represents a module with the old style of the function attributes.
+;; foo_on shall have PAC with B-key as it requested at module level.
+; CHECK-DUMP-LABEL: <foo_on>:
+; CHECK-DUMP-NEXT:     pacibsp
+; CHECK-DUMP-NEXT:     mov     w0, #0x2a
+; CHECK-DUMP-NEXT:     autibsp
+; CHECK-DUMP-NEXT:     ret
+
+;; foo_off shall not have BTI or PAC instructions as they are disabled at function scope.
+; CHECK-DUMP-LABEL:  <foo_off>:
+; CHECK-DUMP-NEXT:       mov     w0, #0x2b
+; CHECK-DUMP-NEXT:       ret
+
+; CHECK-DUMP-LABEL: <main>:
+; CHECK-DUMP-NOT:       paciasp
+; CHECK-DUMP-NEXT:      str     x30,
+; CHECK-DUMP-NEXT:      bl
+; CHECK-DUMP-NEXT:      bl
+; CHECK-DUMP-NEXT:      bl
+; CHECK-DUMP-NEXT:      bl
+; CHECK-DUMP-NEXT:      bl
+; CHECK-DUMP-NEXT:      bl
+; CHECK-DUMP-NEXT:      bl
+; CHECK-DUMP-NEXT:      bl
+; CHECK-DUMP-NEXT:      bl
+
+;; `main` doesn't support PAC sign-return-address while `foo` does, so in the binary
+;; we should not see anything.
+; CHECK-PROP-NOT:   Properties: aarch64 feature: PAC
diff --git a/llvm/test/Linker/link-arm-and-thumb.ll b/llvm/test/Linker/link-arm-and-thumb.ll
index a90f212..b5984bf 100644
--- a/llvm/test/Linker/link-arm-and-thumb.ll
+++ b/llvm/test/Linker/link-arm-and-thumb.ll
@@ -13,11 +13,11 @@ entry:
   ret i32 %add
 }
 
-; CHECK: define i32 @main() {
+; CHECK: define i32 @main()
 ; CHECK: define i32 @foo(i32 %a, i32 %b) [[ARM_ATTRS:#[0-9]+]]
 ; CHECK: define i32 @bar(i32 %a, i32 %b) [[THUMB_ATTRS:#[0-9]+]]
 
-; CHECK: attributes [[ARM_ATTRS]] = { "target-features"="-thumb-mode" }
-; CHECK: attributes [[THUMB_ATTRS]] = { "target-features"="+thumb-mode" }
+; CHECK: attributes [[ARM_ATTRS]] = {{{.*}}"target-features"="-thumb-mode" }
+; CHECK: attributes [[THUMB_ATTRS]] = {{{.*}}"target-features"="+thumb-mode" }
 
 ; STDERR-NOT: warning: Linking two modules of different target triples:
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index d3b44eb..8160544 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
 // GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
 
 v_add_min_i32 v2, s4, v7, v8
-// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_i32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
 
 v_add_min_i32 v2, v4, 0, 1
-// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_i32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
 
 v_add_min_i32 v2, v4, 3, s2
-// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_i32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
 
 v_add_min_i32 v2, s4, 4, v2
-// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_i32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
 
 v_add_min_i32 v2, v4, v7, 12345
-// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
 
 v_add_max_i32 v2, s4, v7, v8
-// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_i32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
 
 v_add_max_i32 v2, v4, 0, 1
-// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_i32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
 
 v_add_max_i32 v2, v4, 3, s2
-// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_i32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
 
 v_add_max_i32 v2, s4, 4, v2
-// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_i32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
 
 v_add_max_i32 v2, v4, v7, 12345
-// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_i32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x5e,0xd6,0x01,0x05,0x0e,0x04]
 
 v_add_min_u32 v2, s4, v7, v8
-// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_u32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
 
 v_add_min_u32 v2, v4, 0, 1
-// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_u32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
 
 v_add_min_u32 v2, v4, 3, s2
-// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_u32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
 
 v_add_min_u32 v2, s4, 4, v2
-// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_u32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
 
 v_add_min_u32 v2, v4, v7, 12345
-// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
 
 v_add_max_u32 v2, s4, v7, v8
-// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_u32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
 
 v_add_max_u32 v2, v4, 0, 1
-// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_u32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
 
 v_add_max_u32 v2, v4, 3, s2
-// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_u32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
 
 v_add_max_u32 v2, s4, 4, v2
-// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_u32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
 
 v_add_max_u32 v2, v4, v7, 12345
-// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
 
 v_cvt_pk_bf16_f32 v5, v1, v2
 // GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2            ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index 98d07ac..d913bd2 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
 // GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
 
 v_add_min_i32 v2, s4, v7, v8
-// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_i32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
 
 v_add_min_i32 v2, v4, 0, 1
-// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_i32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
 
 v_add_min_i32 v2, v4, 3, s2
-// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_i32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
 
 v_add_min_i32 v2, s4, 4, v2
-// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_i32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
 
 v_add_min_i32 v2, v4, v7, 12345
-// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
 
 v_add_max_i32 v2, s4, v7, v8
-// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_i32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
 
 v_add_max_i32 v2, v4, 0, 1
-// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_i32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
 
 v_add_max_i32 v2, v4, 3, s2
-// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_i32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
 
 v_add_max_i32 v2, s4, 4, v2
-// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_i32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
 
 v_add_max_i32 v2, v4, v7, 12345
-// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
 
 v_add_min_u32 v2, s4, v7, v8
-// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_u32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
 
 v_add_min_u32 v2, v4, 0, 1
-// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_u32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
 
 v_add_min_u32 v2, v4, 3, s2
-// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_u32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
 
 v_add_min_u32 v2, s4, 4, v2
-// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_u32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
 
 v_add_min_u32 v2, v4, v7, 12345
-// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
 
 v_add_max_u32 v2, s4, v7, v8
-// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_u32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
 
 v_add_max_u32 v2, v4, 0, 1
-// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_u32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
 
 v_add_max_u32 v2, v4, 3, s2
-// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_u32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
 
 v_add_max_u32 v2, s4, 4, v2
-// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_u32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
 
 v_add_max_u32 v2, v4, v7, 12345
-// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
 
 v_cvt_pk_bf16_f32 v5, v1, v2
 // GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2            ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index 78aa8f2..3faea99 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -20,282 +20,282 @@
 //---------------------------------------------------------------------------//
 
 v_fract_f64 v[0:1], 0.5
-// SICI: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x64,0x00,0x7e]
-// GFX12XX: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x7c,0x00,0x7e]
 // GFX11: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x7c,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x64,0x00,0x7e]
+// SICI: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x7c,0x00,0x7e]
 
 v_sqrt_f64 v[0:1], -4.0
-// SICI: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x68,0x00,0x7e]
-// GFX89: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x50,0x00,0x7e]
-// GFX12XX: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x68,0x00,0x7e]
 // GFX11: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x68,0x00,0x7e]
+// GFX12XX: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x68,0x00,0x7e]
+// GFX89: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x50,0x00,0x7e]
+// SICI: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x68,0x00,0x7e]
 
 v_log_clamp_f32 v1, 0.5
 // NOGFX8PLUS: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // SICI: v_log_clamp_f32_e32 v1, 0.5             ; encoding: [0xf0,0x4c,0x02,0x7e]
 
 v_trunc_f32 v0, 0.5
-// SICI: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x42,0x00,0x7e]
 
 v_fract_f64 v[0:1], -1.0
-// SICI: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x64,0x00,0x7e]
-// GFX12XX: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x7c,0x00,0x7e]
 // GFX11: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x7c,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x64,0x00,0x7e]
+// SICI: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x7c,0x00,0x7e]
 
 v_trunc_f32 v0, -1.0
-// SICI: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x42,0x00,0x7e]
 
 v_fract_f64 v[0:1], 4.0
-// SICI: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x64,0x00,0x7e]
-// GFX12XX: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x7c,0x00,0x7e]
 // GFX11: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x7c,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x64,0x00,0x7e]
+// SICI: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x7c,0x00,0x7e]
 
 v_trunc_f32 v0, 4.0
-// SICI: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x42,0x00,0x7e]
 
 v_fract_f64 v[0:1], 0.0
-// SICI: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x64,0x00,0x7e]
-// GFX12XX: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x7c,0x00,0x7e]
 // GFX11: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x7c,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x64,0x00,0x7e]
+// SICI: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x7c,0x00,0x7e]
 
 v_trunc_f32 v0, 0.0
-// SICI: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
 
 v_fract_f64 v[0:1], 1.5
-// SICI: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
-// GFX89: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf8,0x3f]
-// GFX12XX: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
 // GFX11: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
+// GFX89: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf8,0x3f]
+// SICI: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
 
 v_trunc_f32 v0, 1.5
-// SICI: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
-// GFX89: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0xc0,0x3f]
-// GFX12XX: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
 // GFX11: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
+// GFX12XX: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
+// GFX89: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0xc0,0x3f]
+// SICI: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
 
 v_fract_f64 v[0:1], -3.1415
-// SICI: v_fract_f64_e32 v[0:1], 0xc00921ca      ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
-// GFX89: v_fract_f64_e32 v[0:1], 0xc00921ca      ; encoding: [0xff,0x64,0x00,0x7e,0xca,0x21,0x09,0xc0]
-// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 // GFX11: v_fract_f64_e32 v[0:1], 0xc00921ca      ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
 // GFX12: v_fract_f64_e32 v[0:1], 0xc00921ca      ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
 // GFX1250: v_fract_f64_e32 v[0:1], 0xc00921cac083126f ; encoding: [0xfe,0x7c,0x00,0x7e,0x6f,0x12,0x83,0xc0,0xca,0x21,0x09,0xc0]
-// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX89: v_fract_f64_e32 v[0:1], 0xc00921ca      ; encoding: [0xff,0x64,0x00,0x7e,0xca,0x21,0x09,0xc0]
+// NOGFX11: :[[@LINE-5]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-6]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-7]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICI: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// SICI: v_fract_f64_e32 v[0:1], 0xc00921ca      ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
 // NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
 v_trunc_f32 v0, -3.1415
-// SICI: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
-// GFX89: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x38,0x00,0x7e,0x56,0x0e,0x49,0xc0]
-// GFX12XX: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
 // GFX11: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
+// GFX12XX: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
+// GFX89: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x38,0x00,0x7e,0x56,0x0e,0x49,0xc0]
+// SICI: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
 
 v_fract_f64 v[0:1], 100000000000000000000000.0
-// SICI: v_fract_f64_e32 v[0:1], 0x44b52d02      ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
-// GFX89: v_fract_f64_e32 v[0:1], 0x44b52d02      ; encoding: [0xff,0x64,0x00,0x7e,0x02,0x2d,0xb5,0x44]
-// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 // GFX11: v_fract_f64_e32 v[0:1], 0x44b52d02      ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
 // GFX12: v_fract_f64_e32 v[0:1], 0x44b52d02      ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
 // GFX1250: v_fract_f64_e32 v[0:1], 0x44b52d02c7e14af6 ; encoding: [0xfe,0x7c,0x00,0x7e,0xf6,0x4a,0xe1,0xc7,0x02,0x2d,0xb5,0x44]
-// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX89: v_fract_f64_e32 v[0:1], 0x44b52d02      ; encoding: [0xff,0x64,0x00,0x7e,0x02,0x2d,0xb5,0x44]
+// NOGFX11: :[[@LINE-5]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-6]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-7]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICI: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// SICI: v_fract_f64_e32 v[0:1], 0x44b52d02      ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
 // NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
 v_trunc_f32 v0, 100000000000000000000000.0
-// SICI: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
-// GFX89: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x38,0x00,0x7e,0x16,0x68,0xa9,0x65]
-// GFX12XX: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
 // GFX11: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
+// GFX12XX: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
+// GFX89: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x38,0x00,0x7e,0x16,0x68,0xa9,0x65]
+// SICI: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
 
 v_fract_f64 v[0:1], 10000000.0
-// SICI: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
-// GFX89: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x64,0x00,0x7e,0xd0,0x12,0x63,0x41]
-// GFX12XX: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
 // GFX11: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
+// GFX89: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x64,0x00,0x7e,0xd0,0x12,0x63,0x41]
+// SICI: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
 
 v_trunc_f32 v0, 10000000.0
-// SICI: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
-// GFX89: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x38,0x00,0x7e,0x80,0x96,0x18,0x4b]
-// GFX12XX: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
 // GFX11: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
+// GFX12XX: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
+// GFX89: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x38,0x00,0x7e,0x80,0x96,0x18,0x4b]
+// SICI: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
 
 v_fract_f64 v[0:1], 3.402823e+38
-// SICI: v_fract_f64_e32 v[0:1], 0x47efffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
-// GFX89: v_fract_f64_e32 v[0:1], 0x47efffff      ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xef,0x47]
-// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 // GFX11: v_fract_f64_e32 v[0:1], 0x47efffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
 // GFX12: v_fract_f64_e32 v[0:1], 0x47efffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
 // GFX1250: v_fract_f64_e32 v[0:1], 0x47efffff966ad924 ; encoding: [0xfe,0x7c,0x00,0x7e,0x24,0xd9,0x6a,0x96,0xff,0xff,0xef,0x47]
-// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX89: v_fract_f64_e32 v[0:1], 0x47efffff      ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xef,0x47]
+// NOGFX11: :[[@LINE-5]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-6]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-7]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICI: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// SICI: v_fract_f64_e32 v[0:1], 0x47efffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
 // NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
 v_trunc_f32 v0, 3.402823e+38
-// SICI: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
-// GFX89: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x38,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
-// GFX12XX: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
 // GFX11: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
+// GFX12XX: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
+// GFX89: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x38,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
+// SICI: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
 
 v_fract_f64 v[0:1], 2.3509886e-38
-// SICI: v_fract_f64_e32 v[0:1], 0x381fffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
-// GFX89: v_fract_f64_e32 v[0:1], 0x381fffff      ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0x1f,0x38]
-// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 // GFX11: v_fract_f64_e32 v[0:1], 0x381fffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
 // GFX12: v_fract_f64_e32 v[0:1], 0x381fffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
 // GFX1250: v_fract_f64_e32 v[0:1], 0x381fffffe8c9d9fb ; encoding: [0xfe,0x7c,0x00,0x7e,0xfb,0xd9,0xc9,0xe8,0xff,0xff,0x1f,0x38]
-// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX89: v_fract_f64_e32 v[0:1], 0x381fffff      ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0x1f,0x38]
+// NOGFX11: :[[@LINE-5]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-6]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-7]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICI: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// SICI: v_fract_f64_e32 v[0:1], 0x381fffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
 // NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
 v_trunc_f32 v0, 2.3509886e-38
-// SICI: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
-// GFX89: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x38,0x00,0x7e,0xff,0xff,0xff,0x00]
-// GFX12XX: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
 // GFX11: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
+// GFX12XX: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
+// GFX89: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x38,0x00,0x7e,0xff,0xff,0xff,0x00]
+// SICI: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
 
 v_fract_f64 v[0:1], 2.3509886e-70
-// SICI: v_fract_f64_e32 v[0:1], 0x3179f623      ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
-// GFX89: v_fract_f64_e32 v[0:1], 0x3179f623      ; encoding: [0xff,0x64,0x00,0x7e,0x23,0xf6,0x79,0x31]
-// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 // GFX11: v_fract_f64_e32 v[0:1], 0x3179f623      ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
 // GFX12: v_fract_f64_e32 v[0:1], 0x3179f623      ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
 // GFX1250: v_fract_f64_e32 v[0:1], 0x3179f623c2d3cf3c ; encoding: [0xfe,0x7c,0x00,0x7e,0x3c,0xcf,0xd3,0xc2,0x23,0xf6,0x79,0x31]
-// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX89: v_fract_f64_e32 v[0:1], 0x3179f623      ; encoding: [0xff,0x64,0x00,0x7e,0x23,0xf6,0x79,0x31]
+// NOGFX11: :[[@LINE-5]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-6]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-7]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICI: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// SICI: v_fract_f64_e32 v[0:1], 0x3179f623      ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
 // NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
 v_trunc_f32 v0, 2.3509886e-70
 // NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction
 
 v_fract_f64_e32 v[0:1], 1.0
-// SICI: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x64,0x00,0x7e]
-// GFX12XX: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x7c,0x00,0x7e]
 // GFX11: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x7c,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x64,0x00,0x7e]
+// SICI: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x7c,0x00,0x7e]
 
 v_fract_f64_e32 v[0:1], lit(1.0)
-// SICI: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
-// GFX89: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf0,0x3f]
 // GFX11: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
 // GFX12: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
 // GFX1250: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xfe,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f,0x00,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf0,0x3f]
+// SICI: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
 
 v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1.0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0xca,0x1b]
-// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], lit(1.0)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX11: :[[@LINE-3]]:54: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-4]]:54: error: invalid operand for instruction
-// NOGFX1250: :[[@LINE-5]]:54: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-1]]:54: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-2]]:54: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-3]]:54: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_cos_f16_e32 v5.l, 1.0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 // GFX11: v_cos_f16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xc2,0x0a,0x7e]
 // GFX1250: v_cos_f16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xc2,0x0a,0x7e]
-// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_cos_f16_e32 v5.l, lit(1.0)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 // GFX11: v_cos_f16_e32 v5.l, lit(0x3c00)         ; encoding: [0xff,0xc2,0x0a,0x7e,0x00,0x3c,0x00,0x00]
 // GFX1250: v_cos_f16_e32 v5.l, lit(0x3c00)         ; encoding: [0xff,0xc2,0x0a,0x7e,0x00,0x3c,0x00,0x00]
-// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_tanh_bf16 v5, 1.0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_tanh_bf16_e32 v5, 1.0                 ; encoding: [0xf2,0x94,0x0a,0x7e]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_tanh_bf16 v5, lit(1.0)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_tanh_bf16_e32 v5, lit(0x3f80)         ; encoding: [0xff,0x94,0x0a,0x7e,0x80,0x3f,0x00,0x00]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_trunc_f32_e32 v0, 1.0
-// SICI: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x42,0x00,0x7e]
 
 v_trunc_f32_e32 v0, lit(1.0)
-// SICI: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
-// GFX89: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0x80,0x3f]
-// GFX12XX: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
 // GFX11: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
+// GFX12XX: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
+// GFX89: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0x80,0x3f]
+// SICI: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
 
 v_dot2_bf16_bf16 v5.l, v1, v2, 1.0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, 1.0      ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]
-// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_dot2_bf16_bf16 v5.l, v1, v2, lit(1.0)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, lit(0x3f80) ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0x80,0x3f,0x00,0x00]
-// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_dot2_f32_f16 v5, v1, 1.0, v2
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_dot2_f32_f16 v5, v1, 1.0, v2          ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xe5,0x09,0x1c]
 // GFX12: v_dot2_f32_f16 v5, v1, 1.0, v2          ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xe5,0x09,0x1c]
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_dot2_f32_f16 v5, v1, lit(1.0), v2
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_dot2_f32_f16 v5, v1, lit(0x3c00), v2  ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x00,0x3c,0x00,0x00]
 // GFX12: v_dot2_f32_f16 v5, v1, lit(0x3c00), v2  ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x00,0x3c,0x00,0x00]
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_cvt_pk_fp8_f16 v1.l, 1.0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3c00           ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_cvt_pk_fp8_f16 v1.l, lit(1.0)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x3c00)      ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
 // fp literal, expected int operand
@@ -309,118 +309,118 @@ s_mov_b64 s[0:1], lit(0.5)
 // NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_and_b32_e32 v0, 0.5, v1
-// SICI: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x36]
 
 v_and_b32_e64 v0, 0.5, v1
-// SICI: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x36,0xd2,0xf0,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x13,0xd1,0xf0,0x02,0x02,0x00]
-// GFX12XX: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xf0,0x02,0x02,0x00]
 // GFX11: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xf0,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xf0,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x13,0xd1,0xf0,0x02,0x02,0x00]
+// SICI: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x36,0xd2,0xf0,0x02,0x02,0x00]
 
 s_mov_b64_e32 s[0:1], -1.0
 // GFX8PLUS: s_mov_b64 s[0:1], -1.0                  ; encoding: [0xf3,0x01,0x80,0xbe]
 // SICI: s_mov_b64 s[0:1], -1.0                  ; encoding: [0xf3,0x04,0x80,0xbe]
 
 v_and_b32_e32 v0, -1.0, v1
-// SICI: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x36]
 
 v_and_b32_e64 v0, -1.0, v1
-// SICI: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x36,0xd2,0xf3,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x13,0xd1,0xf3,0x02,0x02,0x00]
-// GFX12XX: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x1b,0xd5,0xf3,0x02,0x02,0x00]
 // GFX11: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x1b,0xd5,0xf3,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x1b,0xd5,0xf3,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x13,0xd1,0xf3,0x02,0x02,0x00]
+// SICI: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x36,0xd2,0xf3,0x02,0x02,0x00]
 
 s_mov_b64_e32 s[0:1], 4.0
 // GFX8PLUS: s_mov_b64 s[0:1], 4.0                   ; encoding: [0xf6,0x01,0x80,0xbe]
 // SICI: s_mov_b64 s[0:1], 4.0                   ; encoding: [0xf6,0x04,0x80,0xbe]
 
 v_and_b32_e32 v0, 4.0, v1
-// SICI: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x36]
 
 v_and_b32_e64 v0, 4.0, v1
-// SICI: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x36,0xd2,0xf6,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x13,0xd1,0xf6,0x02,0x02,0x00]
-// GFX12XX: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xf6,0x02,0x02,0x00]
 // GFX11: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xf6,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xf6,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x13,0xd1,0xf6,0x02,0x02,0x00]
+// SICI: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x36,0xd2,0xf6,0x02,0x02,0x00]
 
 s_mov_b64_e32 s[0:1], 0.0
 // GFX8PLUS: s_mov_b64 s[0:1], 0                     ; encoding: [0x80,0x01,0x80,0xbe]
 // SICI: s_mov_b64 s[0:1], 0                     ; encoding: [0x80,0x04,0x80,0xbe]
 
 v_and_b32_e32 v0, 0.0, v1
-// SICI: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
 
 v_and_b32_e64 v0, 0.0, v1
-// SICI: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
-// GFX12XX: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
 // GFX11: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
+// SICI: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
 
 s_mov_b64_e32 s[0:1], 1.5
 // NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_and_b32_e32 v0, 1.5, v1
-// SICI: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
-// GFX89: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0xc0,0x3f]
-// GFX12XX: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
 // GFX11: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
+// GFX12XX: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
+// GFX89: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0xc0,0x3f]
+// SICI: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
 
 s_mov_b64_e32 s[0:1], -3.1415
 // NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_and_b32_e32 v0, -3.1415, v1
-// SICI: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
-// GFX89: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x26,0x56,0x0e,0x49,0xc0]
-// GFX12XX: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
 // GFX11: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
+// GFX12XX: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
+// GFX89: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x26,0x56,0x0e,0x49,0xc0]
+// SICI: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
 
 s_mov_b64_e32 s[0:1], 100000000000000000000000.0
 // NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_and_b32_e32 v0, 100000000000000000000000.0, v1
-// SICI: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
-// GFX89: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x26,0x16,0x68,0xa9,0x65]
-// GFX12XX: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
 // GFX11: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
+// GFX12XX: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
+// GFX89: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x26,0x16,0x68,0xa9,0x65]
+// SICI: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
 
 s_mov_b64_e32 s[0:1], 10000000.0
 // NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_and_b32_e32 v0, 10000000.0, v1
-// SICI: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
-// GFX89: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x26,0x80,0x96,0x18,0x4b]
-// GFX12XX: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
 // GFX11: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
+// GFX12XX: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
+// GFX89: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x26,0x80,0x96,0x18,0x4b]
+// SICI: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
 
 s_mov_b64_e32 s[0:1], 3.402823e+38
 // NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_and_b32_e32 v0, 3.402823e+38, v1
-// SICI: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
-// GFX89: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x26,0xfd,0xff,0x7f,0x7f]
-// GFX12XX: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
 // GFX11: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
+// GFX12XX: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
+// GFX89: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x26,0xfd,0xff,0x7f,0x7f]
+// SICI: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
 
 s_mov_b64_e32 s[0:1], 2.3509886e-38
 // NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_and_b32_e32 v0, 2.3509886e-38, v1
-// SICI: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
-// GFX89: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x26,0xff,0xff,0xff,0x00]
-// GFX12XX: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
 // GFX11: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
+// GFX12XX: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
+// GFX89: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x26,0xff,0xff,0xff,0x00]
+// SICI: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
 
 s_mov_b64_e32 s[0:1], 2.3509886e-70
 // NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
@@ -429,322 +429,322 @@ v_and_b32_e32 v0, 2.3509886e-70, v1
 // NOGCN: :[[@LINE-1]]:19: error: invalid operand for instruction
 
 v_not_b16 v5.l, 1.0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_not_b16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xd2,0x0a,0x7e]
 // GFX1250: v_not_b16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xd2,0x0a,0x7e]
-// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_not_b16 v5.l, lit(1.0)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_not_b16_e32 v5.l, lit(0x3f800000)     ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x00,0x80,0x3f]
 // GFX1250: v_not_b16_e32 v5.l, lit(0x3f800000)     ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x00,0x80,0x3f]
-// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_and_b32_e32 v0, 1.0, v1
-// SICI: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x36]
 
 v_and_b32_e32 v0, lit(1.0), v1
-// SICI: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
-// GFX89: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0x80,0x3f]
-// GFX12XX: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
 // GFX11: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
+// GFX12XX: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
+// GFX89: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0x80,0x3f]
+// SICI: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
 
 v_pk_add_u16 v5, exec_lo, 1.0
+// GFX11: v_pk_add_u16 v5, exec_lo, 1.0           ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xe4,0x01,0x18]
 // GFX12XX: v_pk_add_u16 v5, exec_lo, 1.0           ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xe4,0x01,0x18]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX9: v_pk_add_u16 v5, exec_lo, 1.0           ; encoding: [0x05,0x40,0x8a,0xd3,0x7e,0xe4,0x01,0x18]
-// GFX11: v_pk_add_u16 v5, exec_lo, 1.0           ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xe4,0x01,0x18]
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_pk_add_u16 v5, exec_lo, lit(1.0)
-// GFX12XX: v_pk_add_u16 v5, exec_lo, lit(0x3f800000) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x00,0x00,0x80,0x3f]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_pk_add_u16 v5, exec_lo, lit(0x3f800000) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x00,0x00,0x80,0x3f]
-// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE-5]]:31: error: invalid operand (violates constant bus restrictions)
+// GFX12XX: v_pk_add_u16 v5, exec_lo, lit(0x3f800000) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x00,0x00,0x80,0x3f]
+// NOGFX9: :[[@LINE-3]]:31: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1.0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1.0 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xca,0x03]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(1.0)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(0x3f800000) ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x00,0x00,0x80,0x3f]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
 // int literal, expected fp operand
 //---------------------------------------------------------------------------//
 
 v_trunc_f32_e32 v0, 0
-// SICI: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
 
 v_fract_f64_e32 v[0:1], 1
-// SICI: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x64,0x00,0x7e]
-// GFX12XX: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x7c,0x00,0x7e]
 // GFX11: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x7c,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x64,0x00,0x7e]
+// SICI: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x7c,0x00,0x7e]
 
 v_fract_f64_e32 v[0:1], lit(1)
-// SICI: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
-// GFX89: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x64,0x00,0x7e,0x01,0x00,0x00,0x00]
 // GFX11: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
 // GFX12: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
 // GFX1250: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xfe,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x64,0x00,0x7e,0x01,0x00,0x00,0x00]
+// SICI: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
 
 v_trunc_f32_e64 v0, 0
-// SICI: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0x42,0xd3,0x80,0x00,0x00,0x00]
-// GFX89: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0x5c,0xd1,0x80,0x00,0x00,0x00]
-// GFX12XX: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0xa1,0xd5,0x80,0x00,0x00,0x00]
 // GFX11: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0xa1,0xd5,0x80,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0xa1,0xd5,0x80,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0x5c,0xd1,0x80,0x00,0x00,0x00]
+// SICI: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0x42,0xd3,0x80,0x00,0x00,0x00]
 
 v_fract_f64_e64 v[0:1], 0
-// SICI: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0x7c,0xd3,0x80,0x00,0x00,0x00]
-// GFX89: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0x72,0xd1,0x80,0x00,0x00,0x00]
-// GFX12XX: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0xbe,0xd5,0x80,0x00,0x00,0x00]
 // GFX11: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0xbe,0xd5,0x80,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0xbe,0xd5,0x80,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0x72,0xd1,0x80,0x00,0x00,0x00]
+// SICI: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0x7c,0xd3,0x80,0x00,0x00,0x00]
 
 v_trunc_f32_e32 v0, -13
-// SICI: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x42,0x00,0x7e]
 
 v_fract_f64_e32 v[0:1], -13
-// SICI: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x64,0x00,0x7e]
-// GFX12XX: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x7c,0x00,0x7e]
 // GFX11: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x7c,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x64,0x00,0x7e]
+// SICI: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x7c,0x00,0x7e]
 
 v_trunc_f32_e64 v0, -13
-// SICI: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0x42,0xd3,0xcd,0x00,0x00,0x00]
-// GFX89: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0x5c,0xd1,0xcd,0x00,0x00,0x00]
-// GFX12XX: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0xa1,0xd5,0xcd,0x00,0x00,0x00]
 // GFX11: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0xa1,0xd5,0xcd,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0xa1,0xd5,0xcd,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0x5c,0xd1,0xcd,0x00,0x00,0x00]
+// SICI: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0x42,0xd3,0xcd,0x00,0x00,0x00]
 
 v_fract_f64_e64 v[0:1], -13
-// SICI: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0x7c,0xd3,0xcd,0x00,0x00,0x00]
-// GFX89: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0x72,0xd1,0xcd,0x00,0x00,0x00]
-// GFX12XX: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0xbe,0xd5,0xcd,0x00,0x00,0x00]
 // GFX11: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0xbe,0xd5,0xcd,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0xbe,0xd5,0xcd,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0x72,0xd1,0xcd,0x00,0x00,0x00]
+// SICI: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0x7c,0xd3,0xcd,0x00,0x00,0x00]
 
 v_trunc_f32_e32 v0, 35
-// SICI: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x42,0x00,0x7e]
 
 v_fract_f64_e32 v[0:1], 35
-// SICI: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x64,0x00,0x7e]
-// GFX12XX: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x7c,0x00,0x7e]
 // GFX11: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x7c,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x64,0x00,0x7e]
+// SICI: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x7c,0x00,0x7e]
 
 v_trunc_f32_e64 v0, 35
-// SICI: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0x42,0xd3,0xa3,0x00,0x00,0x00]
-// GFX89: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0x5c,0xd1,0xa3,0x00,0x00,0x00]
-// GFX12XX: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0xa1,0xd5,0xa3,0x00,0x00,0x00]
 // GFX11: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0xa1,0xd5,0xa3,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0xa1,0xd5,0xa3,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0x5c,0xd1,0xa3,0x00,0x00,0x00]
+// SICI: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0x42,0xd3,0xa3,0x00,0x00,0x00]
 
 v_fract_f64_e64 v[0:1], 35
-// SICI: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0x7c,0xd3,0xa3,0x00,0x00,0x00]
-// GFX89: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0x72,0xd1,0xa3,0x00,0x00,0x00]
-// GFX12XX: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0xbe,0xd5,0xa3,0x00,0x00,0x00]
 // GFX11: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0xbe,0xd5,0xa3,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0xbe,0xd5,0xa3,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0x72,0xd1,0xa3,0x00,0x00,0x00]
+// SICI: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0x7c,0xd3,0xa3,0x00,0x00,0x00]
 
 v_trunc_f32_e32 v0, 1234
-// SICI: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
-// GFX89: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x38,0x00,0x7e,0xd2,0x04,0x00,0x00]
-// GFX12XX: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
 // GFX11: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX12XX: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX89: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x38,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// SICI: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
 
 v_fract_f64_e32 v[0:1], 1234
-// SICI: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
-// GFX89: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00]
-// GFX12XX: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
 // GFX11: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX89: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// SICI: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
 
 v_trunc_f32_e64 v0, 1234
+// GFX11: v_trunc_f32_e64 v0, 0x4d2               ; encoding: [0x00,0x00,0xa1,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
 // GFX12XX: v_trunc_f32_e64 v0, 0x4d2               ; encoding: [0x00,0x00,0xa1,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
-// NOSICI: :[[@LINE-2]]:21: error: literal operands are not supported
 // NOGFX89: :[[@LINE-3]]:21: error: literal operands are not supported
-// GFX11: v_trunc_f32_e64 v0, 0x4d2               ; encoding: [0x00,0x00,0xa1,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
+// NOSICI: :[[@LINE-4]]:21: error: literal operands are not supported
 // NOSICIVI: :[[@LINE-1]]:21: error: literal operands are not supported
 
 v_fract_f64_e64 v[0:1], 1234
+// GFX11: v_fract_f64_e64 v[0:1], 0x4d2           ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
 // GFX12XX: v_fract_f64_e64 v[0:1], 0x4d2           ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
-// NOSICI: :[[@LINE-2]]:25: error: literal operands are not supported
 // NOGFX89: :[[@LINE-3]]:25: error: literal operands are not supported
-// GFX11: v_fract_f64_e64 v[0:1], 0x4d2           ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
+// NOSICI: :[[@LINE-4]]:25: error: literal operands are not supported
 // NOSICIVI: :[[@LINE-1]]:25: error: literal operands are not supported
 
 v_trunc_f32_e32 v0, -54321
-// SICI: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
-// GFX89: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x38,0x00,0x7e,0xcf,0x2b,0xff,0xff]
-// GFX12XX: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
 // GFX11: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX12XX: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX89: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x38,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// SICI: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
 
 v_fract_f64_e32 v[0:1], -54321
-// SICI: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
-// GFX89: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x64,0x00,0x7e,0xcf,0x2b,0xff,0xff]
-// GFX12XX: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
 // GFX11: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX89: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x64,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// SICI: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
 
 v_trunc_f32_e32 v0, 0xdeadbeef
-// SICI: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
-// GFX89: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x38,0x00,0x7e,0xef,0xbe,0xad,0xde]
-// GFX12XX: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
 // GFX11: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX12XX: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX89: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x38,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// SICI: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
 
 v_fract_f64_e32 v[0:1], 0xdeadbeef
-// SICI: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
-// GFX89: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x64,0x00,0x7e,0xef,0xbe,0xad,0xde]
-// GFX12XX: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
 // GFX11: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX89: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x64,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// SICI: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
 
 v_trunc_f32_e32 v0, 0xffffffff
-// SICI: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
 
 v_fract_f64_e32 v[0:1], 0xffffffff
-// SICI: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
-// GFX89: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xff,0xff]
-// GFX12XX: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
 // GFX11: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
+// GFX89: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xff,0xff]
+// SICI: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
 
 v_trunc_f32_e32 v0, 0x123456789abcdef0
 // NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction
 
 v_fract_f64_e32 v[0:1], 0x123456789abcdef0
-// NOSICI: :[[@LINE-1]]:25: error: invalid operand for instruction
-// NOGFX89: :[[@LINE-2]]:25: error: invalid operand for instruction
 // GFX1250: v_fract_f64_e32 v[0:1], 0x123456789abcdef0 ; encoding: [0xfe,0x7c,0x00,0x7e,0xf0,0xde,0xbc,0x9a,0x78,0x56,0x34,0x12]
-// NOGFX11: :[[@LINE-4]]:25: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-5]]:25: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-2]]:25: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-3]]:25: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-4]]:25: error: invalid operand for instruction
+// NOSICI: :[[@LINE-5]]:25: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:25: error: invalid operand for instruction
 
 v_trunc_f32_e32 v0, 0xffffffffffffffff
-// SICI: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
 
 v_fract_f64_e32 v[0:1], 0xffffffffffffffff
-// SICI: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x64,0x00,0x7e]
-// GFX12XX: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x7c,0x00,0x7e]
 // GFX11: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x7c,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x64,0x00,0x7e]
+// SICI: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x7c,0x00,0x7e]
 
 v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0x06,0x1a]
-// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], lit(1)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX11: :[[@LINE-3]]:54: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-4]]:54: error: invalid operand for instruction
-// NOGFX1250: :[[@LINE-5]]:54: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-1]]:54: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-2]]:54: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-3]]:54: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_cos_f16_e32 v5.l, 1
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 // GFX11: v_cos_f16_e32 v5.l, 1                   ; encoding: [0x81,0xc2,0x0a,0x7e]
 // GFX1250: v_cos_f16_e32 v5.l, 1                   ; encoding: [0x81,0xc2,0x0a,0x7e]
-// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_cos_f16_e32 v5.l, lit(1)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 // GFX11: v_cos_f16_e32 v5.l, lit(0x1)            ; encoding: [0xff,0xc2,0x0a,0x7e,0x01,0x00,0x00,0x00]
 // GFX1250: v_cos_f16_e32 v5.l, lit(0x1)            ; encoding: [0xff,0xc2,0x0a,0x7e,0x01,0x00,0x00,0x00]
-// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_tanh_bf16 v5, 1
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_tanh_bf16_e32 v5, 1                   ; encoding: [0x81,0x94,0x0a,0x7e]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_tanh_bf16 v5, lit(1)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_tanh_bf16_e32 v5, lit(0x1)            ; encoding: [0xff,0x94,0x0a,0x7e,0x01,0x00,0x00,0x00]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_trunc_f32_e32 v0, 1
-// SICI: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x42,0x00,0x7e]
 
 v_trunc_f32_e32 v0, lit(1)
-// SICI: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
-// GFX89: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x38,0x00,0x7e,0x01,0x00,0x00,0x00]
-// GFX12XX: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
 // GFX11: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x38,0x00,0x7e,0x01,0x00,0x00,0x00]
+// SICI: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
 
 v_dot2_bf16_bf16 v5.l, v1, v2, 1
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, 1        ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x06,0x02]
-// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_dot2_bf16_bf16 v5.l, v1, v2, lit(1)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, lit(0x1) ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0x01,0x00,0x00,0x00]
-// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_dot2_f32_f16 v5, v1, 1, v2
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_dot2_f32_f16 v5, v1, 1, v2            ; encoding: [0x05,0x40,0x13,0xcc,0x01,0x03,0x09,0x1c]
 // GFX12: v_dot2_f32_f16 v5, v1, 1, v2            ; encoding: [0x05,0x40,0x13,0xcc,0x01,0x03,0x09,0x1c]
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_dot2_f32_f16 v5, v1, lit(1), v2
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_dot2_f32_f16 v5, v1, lit(0x1), v2     ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x01,0x00,0x00,0x00]
 // GFX12: v_dot2_f32_f16 v5, v1, lit(0x1), v2     ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x01,0x00,0x00,0x00]
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_cvt_pk_fp8_f16 v1.l, 1
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_cvt_pk_fp8_f16 v1.l, 1                ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_cvt_pk_fp8_f16 v1.l, lit(1)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x1)         ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
 // int literal, expected int operand
@@ -755,111 +755,111 @@ s_mov_b64_e32 s[0:1], 0
 // SICI: s_mov_b64 s[0:1], 0                     ; encoding: [0x80,0x04,0x80,0xbe]
 
 v_and_b32_e32 v0, 0, v1
-// SICI: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
 
 v_and_b32_e64 v0, 0, v1
-// SICI: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
-// GFX12XX: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
 // GFX11: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
+// SICI: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
 
 s_mov_b64_e32 s[0:1], -13
 // GFX8PLUS: s_mov_b64 s[0:1], -13                   ; encoding: [0xcd,0x01,0x80,0xbe]
 // SICI: s_mov_b64 s[0:1], -13                   ; encoding: [0xcd,0x04,0x80,0xbe]
 
 v_and_b32_e32 v0, -13, v1
-// SICI: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x36]
 
 v_and_b32_e64 v0, -13, v1
-// SICI: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x36,0xd2,0xcd,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x13,0xd1,0xcd,0x02,0x02,0x00]
-// GFX12XX: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xcd,0x02,0x02,0x00]
 // GFX11: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xcd,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xcd,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x13,0xd1,0xcd,0x02,0x02,0x00]
+// SICI: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x36,0xd2,0xcd,0x02,0x02,0x00]
 
 s_mov_b64_e32 s[0:1], 35
 // GFX8PLUS: s_mov_b64 s[0:1], 35                    ; encoding: [0xa3,0x01,0x80,0xbe]
 // SICI: s_mov_b64 s[0:1], 35                    ; encoding: [0xa3,0x04,0x80,0xbe]
 
 v_and_b32_e32 v0, 35, v1
-// SICI: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x36]
 
 v_and_b32_e64 v0, 35, v1
-// SICI: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x36,0xd2,0xa3,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x13,0xd1,0xa3,0x02,0x02,0x00]
-// GFX12XX: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x1b,0xd5,0xa3,0x02,0x02,0x00]
 // GFX11: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x1b,0xd5,0xa3,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x1b,0xd5,0xa3,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x13,0xd1,0xa3,0x02,0x02,0x00]
+// SICI: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x36,0xd2,0xa3,0x02,0x02,0x00]
 
 s_mov_b64_e32 s[0:1], 1234
 // GFX8PLUS: s_mov_b64 s[0:1], 0x4d2                 ; encoding: [0xff,0x01,0x80,0xbe,0xd2,0x04,0x00,0x00]
 // SICI: s_mov_b64 s[0:1], 0x4d2                 ; encoding: [0xff,0x04,0x80,0xbe,0xd2,0x04,0x00,0x00]
 
 v_and_b32_e32 v0, 1234, v1
-// SICI: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
-// GFX89: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00]
-// GFX12XX: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
 // GFX11: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
+// GFX12XX: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
+// GFX89: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00]
+// SICI: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
 
 v_and_b32_e64 v0, 1234, v1
+// GFX11: v_and_b32_e64 v0, 0x4d2, v1             ; encoding: [0x00,0x00,0x1b,0xd5,0xff,0x02,0x02,0x00,0xd2,0x04,0x00,0x00]
 // GFX12XX: v_and_b32_e64 v0, 0x4d2, v1             ; encoding: [0x00,0x00,0x1b,0xd5,0xff,0x02,0x02,0x00,0xd2,0x04,0x00,0x00]
-// NOSICI: :[[@LINE-2]]:19: error: literal operands are not supported
 // NOGFX89: :[[@LINE-3]]:19: error: literal operands are not supported
-// GFX11: v_and_b32_e64 v0, 0x4d2, v1             ; encoding: [0x00,0x00,0x1b,0xd5,0xff,0x02,0x02,0x00,0xd2,0x04,0x00,0x00]
+// NOSICI: :[[@LINE-4]]:19: error: literal operands are not supported
 // NOSICIVI: :[[@LINE-1]]:19: error: literal operands are not supported
 
 s_mov_b64_e32 s[0:1], -54321
-// SICI: s_mov_b64 s[0:1], 0xffff2bcf            ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff]
-// GFX89: s_mov_b64 s[0:1], 0xffff2bcf            ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
 // GFX11: s_mov_b64 s[0:1], 0xffff2bcf            ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
 // GFX12: s_mov_b64 s[0:1], 0xffff2bcf            ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
 // GFX1250: s_mov_b64 s[0:1], 0xffffffffffff2bcf    ; encoding: [0xfe,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff,0xff,0xff,0xff,0xff]
+// GFX89: s_mov_b64 s[0:1], 0xffff2bcf            ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
+// SICI: s_mov_b64 s[0:1], 0xffff2bcf            ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff]
 
 v_and_b32_e32 v0, -54321, v1
-// SICI: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
-// GFX89: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x26,0xcf,0x2b,0xff,0xff]
-// GFX12XX: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
 // GFX11: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
+// GFX12XX: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
+// GFX89: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x26,0xcf,0x2b,0xff,0xff]
+// SICI: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
 
 s_mov_b64_e32 s[0:1], 0xdeadbeef
-// SICI: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde]
-// GFX89: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
 // GFX11: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
 // GFX12: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
 // GFX1250: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xfe,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde,0x00,0x00,0x00,0x00]
+// GFX89: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
+// SICI: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde]
 
 v_and_b32_e32 v0, 0xdeadbeef, v1
-// SICI: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
-// GFX89: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x26,0xef,0xbe,0xad,0xde]
-// GFX12XX: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
 // GFX11: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
+// GFX12XX: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
+// GFX89: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x26,0xef,0xbe,0xad,0xde]
+// SICI: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
 
 s_mov_b64_e32 s[0:1], 0xffffffff
-// SICI: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff]
-// GFX89: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
 // GFX11: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
 // GFX12: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
 // GFX1250: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+// GFX89: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
+// SICI: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff]
 
 v_and_b32_e32 v0, 0xffffffff, v1
-// SICI: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
 
 s_mov_b64_e32 s[0:1], 0x123456789abcdef0
-// NOSICI: :[[@LINE-1]]:23: error: invalid operand for instruction
-// NOGFX89: :[[@LINE-2]]:23: error: invalid operand for instruction
 // GFX1250: s_mov_b64 s[0:1], 0x123456789abcdef0    ; encoding: [0xfe,0x01,0x80,0xbe,0xf0,0xde,0xbc,0x9a,0x78,0x56,0x34,0x12]
-// NOGFX11: :[[@LINE-4]]:23: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-5]]:23: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-2]]:23: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-3]]:23: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-4]]:23: error: invalid operand for instruction
+// NOSICI: :[[@LINE-5]]:23: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_and_b32_e32 v0, 0x123456789abcdef0, v1
@@ -870,75 +870,75 @@ s_mov_b64_e32 s[0:1], 0xffffffffffffffff
 // SICI: s_mov_b64 s[0:1], -1                    ; encoding: [0xc1,0x04,0x80,0xbe]
 
 v_and_b32_e32 v0, 0xffffffffffffffff, v1
-// SICI: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
 
 v_not_b16 v5.l, 1
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_not_b16_e32 v5.l, 1                   ; encoding: [0x81,0xd2,0x0a,0x7e]
 // GFX1250: v_not_b16_e32 v5.l, 1                   ; encoding: [0x81,0xd2,0x0a,0x7e]
-// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_not_b16 v5.l, lit(1)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_not_b16_e32 v5.l, lit(0x1)            ; encoding: [0xff,0xd2,0x0a,0x7e,0x01,0x00,0x00,0x00]
 // GFX1250: v_not_b16_e32 v5.l, lit(0x1)            ; encoding: [0xff,0xd2,0x0a,0x7e,0x01,0x00,0x00,0x00]
-// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 s_mov_b64 s[0:1], 1
 // GFX8PLUS: s_mov_b64 s[0:1], 1                     ; encoding: [0x81,0x01,0x80,0xbe]
 // SICI: s_mov_b64 s[0:1], 1                     ; encoding: [0x81,0x04,0x80,0xbe]
 
 s_mov_b64 s[0:1], lit(1)
-// SICI: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x04,0x80,0xbe,0x01,0x00,0x00,0x00]
-// GFX89: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
 // GFX11: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
 // GFX12: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
 // GFX1250: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX89: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
+// SICI: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x04,0x80,0xbe,0x01,0x00,0x00,0x00]
 
 v_and_b32_e32 v0, 1, v1
-// SICI: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x36]
 
 v_and_b32_e32 v0, lit(1), v1
-// SICI: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
-// GFX89: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x26,0x01,0x00,0x00,0x00]
-// GFX12XX: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
 // GFX11: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
+// GFX12XX: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
+// GFX89: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x26,0x01,0x00,0x00,0x00]
+// SICI: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
 
 v_pk_add_u16 v5, exec_lo, 1
+// GFX11: v_pk_add_u16 v5, exec_lo, 1             ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x02,0x01,0x18]
 // GFX12XX: v_pk_add_u16 v5, exec_lo, 1             ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x02,0x01,0x18]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX9: v_pk_add_u16 v5, exec_lo, 1             ; encoding: [0x05,0x40,0x8a,0xd3,0x7e,0x02,0x01,0x18]
-// GFX11: v_pk_add_u16 v5, exec_lo, 1             ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x02,0x01,0x18]
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_pk_add_u16 v5, exec_lo, lit(1)
-// GFX12XX: v_pk_add_u16 v5, exec_lo, lit(0x1)      ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x01,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_pk_add_u16 v5, exec_lo, lit(0x1)      ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x01,0x00,0x00,0x00]
-// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE-5]]:31: error: invalid operand (violates constant bus restrictions)
+// GFX12XX: v_pk_add_u16 v5, exec_lo, lit(0x1)      ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x01,0x00,0x00,0x00]
+// NOGFX9: :[[@LINE-3]]:31: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0x06,0x02]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(1)
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(0x1) ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x01,0x00,0x00,0x00]
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
 // 1/(2*PI)
@@ -948,46 +948,46 @@ v_trunc_f32_e32 v0, 0x3fc45f306dc9c882
 // NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction
 
 v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882
-// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
-// GFX12XX: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
-// NOSICI: :[[@LINE-3]]:25: error: invalid operand for instruction
 // GFX11: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
+// NOSICI: :[[@LINE-4]]:25: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-2]]:25: error: invalid operand for instruction
 
 v_trunc_f32_e32 v0, 0x3e22f983
-// SICI: v_trunc_f32_e32 v0, 0x3e22f983          ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX89: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, 0x3e22f983          ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
 
 v_fract_f64_e32 v[0:1], 0x3e22f983
-// SICI: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX89: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x64,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX12XX: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
 // GFX11: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX89: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x64,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// SICI: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
 
 v_trunc_f32_e64 v0, 0x3fc45f306dc9c882
 // NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction
 
 v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882
-// GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00]
-// GFX12XX: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0xbe,0xd5,0xf8,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-3]]:25: error: invalid operand for instruction
 // GFX11: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0xbe,0xd5,0xf8,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0xbe,0xd5,0xf8,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-4]]:25: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-2]]:25: error: invalid operand for instruction
 
 v_trunc_f32_e64 v0, 0x3e22f983
-// GFX89: v_trunc_f32_e64 v0, 0.15915494          ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00]
-// GFX12XX: v_trunc_f32_e64 v0, 0.15915494          ; encoding: [0x00,0x00,0xa1,0xd5,0xf8,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-3]]:21: error: literal operands are not supported
 // GFX11: v_trunc_f32_e64 v0, 0.15915494          ; encoding: [0x00,0x00,0xa1,0xd5,0xf8,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, 0.15915494          ; encoding: [0x00,0x00,0xa1,0xd5,0xf8,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e64 v0, 0.15915494          ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-4]]:21: error: literal operands are not supported
 // NOSICIVI: :[[@LINE-2]]:21: error: literal operands are not supported
 
 v_fract_f64_e64 v[0:1], 0x3e22f983
+// GFX11: v_fract_f64_e64 v[0:1], 0x3e22f983      ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0x83,0xf9,0x22,0x3e]
 // GFX12XX: v_fract_f64_e64 v[0:1], 0x3e22f983      ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0x83,0xf9,0x22,0x3e]
-// NOSICI: :[[@LINE-2]]:25: error: literal operands are not supported
 // NOGFX89: :[[@LINE-3]]:25: error: literal operands are not supported
-// GFX11: v_fract_f64_e64 v[0:1], 0x3e22f983      ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0x83,0xf9,0x22,0x3e]
+// NOSICI: :[[@LINE-4]]:25: error: literal operands are not supported
 // NOSICIVI: :[[@LINE-1]]:25: error: literal operands are not supported
 
 s_mov_b64_e32 s[0:1], 0.159154943091895317852646485335
@@ -996,37 +996,37 @@ s_mov_b64_e32 s[0:1], 0.159154943091895317852646485335
 // NOSICIVI: :[[@LINE-2]]:23: error: invalid operand for instruction
 
 v_and_b32_e32 v0, 0.159154943091895317852646485335, v1
-// SICI: v_and_b32_e32 v0, 0x3e22f983, v1        ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e]
-// GFX89: v_and_b32_e32 v0, 0.15915494, v1        ; encoding: [0xf8,0x02,0x00,0x26]
-// GFX12XX: v_and_b32_e32 v0, 0.15915494, v1        ; encoding: [0xf8,0x02,0x00,0x36]
 // GFX11: v_and_b32_e32 v0, 0.15915494, v1        ; encoding: [0xf8,0x02,0x00,0x36]
+// GFX12XX: v_and_b32_e32 v0, 0.15915494, v1        ; encoding: [0xf8,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 0.15915494, v1        ; encoding: [0xf8,0x02,0x00,0x26]
+// SICI: v_and_b32_e32 v0, 0x3e22f983, v1        ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e]
 
 v_and_b32_e64 v0, 0.159154943091895317852646485335, v1
-// GFX89: v_and_b32_e64 v0, 0.15915494, v1        ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00]
-// GFX12XX: v_and_b32_e64 v0, 0.15915494, v1        ; encoding: [0x00,0x00,0x1b,0xd5,0xf8,0x02,0x02,0x00]
-// NOSICI: :[[@LINE-3]]:19: error: literal operands are not supported
 // GFX11: v_and_b32_e64 v0, 0.15915494, v1        ; encoding: [0x00,0x00,0x1b,0xd5,0xf8,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0.15915494, v1        ; encoding: [0x00,0x00,0x1b,0xd5,0xf8,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 0.15915494, v1        ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00]
+// NOSICI: :[[@LINE-4]]:19: error: literal operands are not supported
 // NOSICIVI: :[[@LINE-2]]:19: error: literal operands are not supported
 
 v_fract_f64 v[0:1], 0.159154943091895317852646485335
-// SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30      ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f]
-// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
 // GFX12XX: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
 // NOSICI: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// GFX11: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30      ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f]
 // NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
 v_trunc_f32 v0, 0.159154943091895317852646485335
-// SICI: v_trunc_f32_e32 v0, 0x3e22f983          ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX89: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x38,0x00,0x7e]
-// GFX12XX: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x42,0x00,0x7e]
 // GFX11: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x42,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x38,0x00,0x7e]
+// SICI: v_trunc_f32_e32 v0, 0x3e22f983          ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
 
 v_trunc_f32 v0, lit(0.159154943091895317852646485335)
-// SICI: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX89: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x38,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX12XX: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
 // GFX11: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX12XX: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX89: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x38,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// SICI: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
 
 //---------------------------------------------------------------------------//
 // integer literal truncation checks
@@ -1051,54 +1051,54 @@ v_trunc_f32 v0, 0x1fffffff000
 // NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction
 
 s_mov_b64 s[0:1], 0x101ffffffff
-// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction
-// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction
 // GFX1250: s_mov_b64 s[0:1], 0x101ffffffff         ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x01,0x01,0x00,0x00]
-// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-2]]:19: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-3]]:19: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-4]]:19: error: invalid operand for instruction
+// NOSICI: :[[@LINE-5]]:19: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction
 
 s_mov_b64 s[0:1], 0x1000000001
-// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction
-// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction
 // GFX1250: s_mov_b64 s[0:1], 0x1000000001          ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
-// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-2]]:19: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-3]]:19: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-4]]:19: error: invalid operand for instruction
+// NOSICI: :[[@LINE-5]]:19: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction
 
 s_mov_b64 s[0:1], 0x1000000fff
-// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction
-// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction
 // GFX1250: s_mov_b64 s[0:1], 0x1000000fff          ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0x0f,0x00,0x00,0x10,0x00,0x00,0x00]
-// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-2]]:19: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-3]]:19: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-4]]:19: error: invalid operand for instruction
+// NOSICI: :[[@LINE-5]]:19: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction
 
 v_trunc_f64 v[0:1], 0x1fffffffff0
-// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction
 // GFX1250: v_trunc_f64_e32 v[0:1], 0x1fffffffff0   ; encoding: [0xfe,0x2e,0x00,0x7e,0xf0,0xff,0xff,0xff,0xff,0x01,0x00,0x00]
-// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction
-// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction
+// NOCI: :[[@LINE-2]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-3]]:21: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-4]]:21: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-5]]:21: error: invalid operand for instruction
+// NOSI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 // NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction
 
 v_trunc_f64 v[0:1], 0x100000001
-// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction
 // GFX1250: v_trunc_f64_e32 v[0:1], 0x100000001     ; encoding: [0xfe,0x2e,0x00,0x7e,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
-// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction
-// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction
+// NOCI: :[[@LINE-2]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-3]]:21: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-4]]:21: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-5]]:21: error: invalid operand for instruction
+// NOSI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 // NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction
 
 v_trunc_f64 v[0:1], 0x1fffffff000
-// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction
 // GFX1250: v_trunc_f64_e32 v[0:1], 0x1fffffff000   ; encoding: [0xfe,0x2e,0x00,0x7e,0x00,0xf0,0xff,0xff,0xff,0x01,0x00,0x00]
-// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction
-// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction
+// NOCI: :[[@LINE-2]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-3]]:21: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-4]]:21: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-5]]:21: error: invalid operand for instruction
+// NOSI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 // NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction
 
 //---------------------------------------------------------------------------//
@@ -1106,210 +1106,210 @@ v_trunc_f64 v[0:1], 0x1fffffff000
 //---------------------------------------------------------------------------//
 
 buffer_atomic_add v0, off, s[0:3], scc offset:4095
-// SICI: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xc8,0xe0,0x00,0x00,0x00,0xfd]
-// GFX89: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xfd]
-// GFX12XX: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
 // GFX11: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xd4,0xe0,0x00,0x00,0x00,0xfd]
+// GFX12XX: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX89: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xfd]
+// SICI: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xc8,0xe0,0x00,0x00,0x00,0xfd]
 
 s_add_i32 s0, vccz, s0
-// SICI: s_add_i32 s0, src_vccz, s0              ; encoding: [0xfb,0x00,0x00,0x81]
 // GFX89: s_add_i32 s0, src_vccz, s0              ; encoding: [0xfb,0x00,0x00,0x81]
-// NOGFX11: :[[@LINE-3]]:15: error: src_vccz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:15: error: src_vccz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:15: error: src_vccz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:15: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:15: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:15: error: src_vccz register not available on this GPU
+// SICI: s_add_i32 s0, src_vccz, s0              ; encoding: [0xfb,0x00,0x00,0x81]
 
 s_add_i32 s0, execz, s0
-// SICI: s_add_i32 s0, src_execz, s0             ; encoding: [0xfc,0x00,0x00,0x81]
 // GFX89: s_add_i32 s0, src_execz, s0             ; encoding: [0xfc,0x00,0x00,0x81]
-// NOGFX11: :[[@LINE-3]]:15: error: src_execz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:15: error: src_execz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:15: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:15: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
+// SICI: s_add_i32 s0, src_execz, s0             ; encoding: [0xfc,0x00,0x00,0x81]
 
 s_add_i32 s0, scc, s0
-// SICI: s_add_i32 s0, src_scc, s0               ; encoding: [0xfd,0x00,0x00,0x81]
-// GFX89: s_add_i32 s0, src_scc, s0               ; encoding: [0xfd,0x00,0x00,0x81]
-// GFX12XX: s_add_co_i32 s0, src_scc, s0            ; encoding: [0xfd,0x00,0x00,0x81]
 // GFX11: s_add_i32 s0, src_scc, s0               ; encoding: [0xfd,0x00,0x00,0x81]
+// GFX12XX: s_add_co_i32 s0, src_scc, s0            ; encoding: [0xfd,0x00,0x00,0x81]
+// GFX89: s_add_i32 s0, src_scc, s0               ; encoding: [0xfd,0x00,0x00,0x81]
+// SICI: s_add_i32 s0, src_scc, s0               ; encoding: [0xfd,0x00,0x00,0x81]
 
 s_and_b64 s[0:1], s[0:1], src_vccz
-// SICI: s_and_b64 s[0:1], s[0:1], src_vccz      ; encoding: [0x00,0xfb,0x80,0x87]
 // GFX89: s_and_b64 s[0:1], s[0:1], src_vccz      ; encoding: [0x00,0xfb,0x80,0x86]
-// NOGFX11: :[[@LINE-3]]:27: error: src_vccz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:27: error: src_vccz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:27: error: src_vccz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:27: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:27: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:27: error: src_vccz register not available on this GPU
+// SICI: s_and_b64 s[0:1], s[0:1], src_vccz      ; encoding: [0x00,0xfb,0x80,0x87]
 
 s_and_b64 s[0:1], s[0:1], src_execz
-// SICI: s_and_b64 s[0:1], s[0:1], src_execz     ; encoding: [0x00,0xfc,0x80,0x87]
 // GFX89: s_and_b64 s[0:1], s[0:1], src_execz     ; encoding: [0x00,0xfc,0x80,0x86]
-// NOGFX11: :[[@LINE-3]]:27: error: src_execz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:27: error: src_execz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:27: error: src_execz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:27: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:27: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:27: error: src_execz register not available on this GPU
+// SICI: s_and_b64 s[0:1], s[0:1], src_execz     ; encoding: [0x00,0xfc,0x80,0x87]
 
 s_and_b64 s[0:1], s[0:1], src_scc
-// SICI: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x87]
-// GFX89: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x86]
-// GFX12XX: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x8b]
 // GFX11: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x8b]
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x8b]
+// GFX89: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x86]
+// SICI: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x87]
 
 v_add_u16 v0, vccz, v0
 // GFX89: v_add_u16_e32 v0, src_vccz, v0          ; encoding: [0xfb,0x00,0x00,0x4c]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_add_u16_sdwa v0, scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_add_u16_sdwa v0, src_scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06]
-// NOVI: :[[@LINE-3]]:20: error: invalid operand for instruction
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:20: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_add_u16_sdwa v0, v0, scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86]
-// NOVI: :[[@LINE-3]]:24: error: invalid operand for instruction
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:24: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_add_u32 v0, execz, v0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_add_u32_e32 v0, src_execz, v0         ; encoding: [0xfc,0x00,0x00,0x68]
-// NOVI: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
-// NOGFX11: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
-// NOGFX12: :[[@LINE-5]]:15: error: src_execz register not available on this GPU
-// NOGFX1250: :[[@LINE-6]]:15: error: src_execz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:15: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:15: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:1: error: operands are not valid for this GPU or mode
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_add_u32_e64 v0, scc, v0
+// GFX11: v_add_nc_u32_e64 v0, src_scc, v0        ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x02,0x00]
 // GFX12XX: v_add_nc_u32_e64 v0, src_scc, v0        ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x02,0x00]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX9: v_add_u32_e64 v0, src_scc, v0           ; encoding: [0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00]
-// GFX11: v_add_nc_u32_e64 v0, src_scc, v0        ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x02,0x00]
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOVI: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_cmp_eq_i64 vcc, scc, v[0:1]
-// SICI: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1]   ; encoding: [0xfd,0x00,0x44,0x7d]
 // GFX89: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1]   ; encoding: [0xfd,0x00,0xc4,0x7d]
-// NOGFX11: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
-// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX11: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// SICI: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1]   ; encoding: [0xfd,0x00,0x44,0x7d]
 
 v_max_f16 v0, execz, v0
 // GFX89: v_max_f16_e32 v0, src_execz, v0         ; encoding: [0xfc,0x00,0x00,0x5a]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX11: :[[@LINE-3]]:15: error: src_execz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:15: error: src_execz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:15: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:15: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_max_f32 v0, vccz, v0
-// SICI: v_max_f32_e32 v0, src_vccz, v0          ; encoding: [0xfb,0x00,0x00,0x20]
 // GFX89: v_max_f32_e32 v0, src_vccz, v0          ; encoding: [0xfb,0x00,0x00,0x16]
-// NOGFX11: :[[@LINE-3]]:15: error: src_vccz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:15: error: src_vccz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:15: error: src_vccz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:15: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:15: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:15: error: src_vccz register not available on this GPU
+// SICI: v_max_f32_e32 v0, src_vccz, v0          ; encoding: [0xfb,0x00,0x00,0x20]
 
 v_max_f64 v[0:1], scc, v[0:1]
-// SICI: v_max_f64 v[0:1], src_scc, v[0:1]       ; encoding: [0x00,0x00,0xce,0xd2,0xfd,0x00,0x02,0x00]
-// GFX89: v_max_f64 v[0:1], src_scc, v[0:1]       ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00]
-// GFX12XX: v_max_num_f64_e32 v[0:1], src_scc, v[0:1] ; encoding: [0xfd,0x00,0x00,0x1c]
 // GFX11: v_max_f64 v[0:1], src_scc, v[0:1]       ; encoding: [0x00,0x00,0x2a,0xd7,0xfd,0x00,0x02,0x00]
+// GFX12XX: v_max_num_f64_e32 v[0:1], src_scc, v[0:1] ; encoding: [0xfd,0x00,0x00,0x1c]
+// GFX89: v_max_f64 v[0:1], src_scc, v[0:1]       ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00]
+// SICI: v_max_f64 v[0:1], src_scc, v[0:1]       ; encoding: [0x00,0x00,0xce,0xd2,0xfd,0x00,0x02,0x00]
 
 v_pk_add_f16 v0, execz, v0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_pk_add_f16 v0, src_execz, v0          ; encoding: [0x00,0x40,0x8f,0xd3,0xfc,0x00,0x02,0x18]
-// NOVI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX11: :[[@LINE-4]]:18: error: src_execz register not available on this GPU
-// NOGFX12: :[[@LINE-5]]:18: error: src_execz register not available on this GPU
-// NOGFX1250: :[[@LINE-6]]:18: error: src_execz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:18: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:18: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:18: error: src_execz register not available on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_ceil_f16 v0, neg(vccz)
 // GFX89: v_ceil_f16_e64 v0, -src_vccz            ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX11: :[[@LINE-3]]:20: error: src_vccz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:20: error: src_vccz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:20: error: src_vccz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:20: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:20: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:20: error: src_vccz register not available on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_ceil_f16 v0, abs(scc)
-// GFX89: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00]
-// GFX12XX: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
 // GFX11: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// GFX12XX: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// GFX89: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_ceil_f64 v[5:6], |execz|
-// GFX89: v_ceil_f64_e64 v[5:6], |src_execz|      ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00]
 // CI: v_ceil_f64_e64 v[5:6], |src_execz|      ; encoding: [0x05,0x01,0x30,0xd3,0xfc,0x00,0x00,0x00]
-// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX11: :[[@LINE-4]]:21: error: src_execz register not available on this GPU
-// NOGFX12: :[[@LINE-5]]:21: error: src_execz register not available on this GPU
-// NOGFX1250: :[[@LINE-6]]:21: error: src_execz register not available on this GPU
+// GFX89: v_ceil_f64_e64 v[5:6], |src_execz|      ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-3]]:21: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:21: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:21: error: src_execz register not available on this GPU
+// NOSI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 
 v_ceil_f64 v[5:6], -vcc
-// GFX89: v_ceil_f64_e64 v[5:6], -vcc             ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20]
 // CI: v_ceil_f64_e64 v[5:6], -vcc             ; encoding: [0x05,0x00,0x30,0xd3,0x6a,0x00,0x00,0x20]
 // GFX11: v_ceil_f64_e64 v[5:6], -vcc             ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x20]
 // GFX12: v_ceil_f64_e64 v[5:6], -vcc             ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x20]
-// NOSI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-6]]:12: error: invalid operand for instruction
+// GFX89: v_ceil_f64_e64 v[5:6], -vcc             ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20]
+// NOGFX1250: :[[@LINE-5]]:12: error: invalid operand for instruction
+// NOSI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 
 v_ceil_f32 v0, -vccz
-// SICI: v_ceil_f32_e64 v0, -src_vccz            ; encoding: [0x00,0x00,0x44,0xd3,0xfb,0x00,0x00,0x20]
 // GFX89: v_ceil_f32_e64 v0, -src_vccz            ; encoding: [0x00,0x00,0x5d,0xd1,0xfb,0x00,0x00,0x20]
-// NOGFX11: :[[@LINE-3]]:17: error: src_vccz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:17: error: src_vccz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:17: error: src_vccz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:17: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:17: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:17: error: src_vccz register not available on this GPU
+// SICI: v_ceil_f32_e64 v0, -src_vccz            ; encoding: [0x00,0x00,0x44,0xd3,0xfb,0x00,0x00,0x20]
 
 v_ceil_f32 v0, |execz|
-// SICI: v_ceil_f32_e64 v0, |src_execz|          ; encoding: [0x00,0x01,0x44,0xd3,0xfc,0x00,0x00,0x00]
 // GFX89: v_ceil_f32_e64 v0, |src_execz|          ; encoding: [0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00]
-// NOGFX11: :[[@LINE-3]]:17: error: src_execz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:17: error: src_execz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:17: error: src_execz register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:17: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:17: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:17: error: src_execz register not available on this GPU
+// SICI: v_ceil_f32_e64 v0, |src_execz|          ; encoding: [0x00,0x01,0x44,0xd3,0xfc,0x00,0x00,0x00]
 
 v_ceil_f16_sdwa v5, |vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_ceil_f16_sdwa v5, |src_vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00]
-// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction
-// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX11: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-3]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:22: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_ceil_f16_sdwa v5, -scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_ceil_f16_sdwa v5, -src_scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00]
-// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction
-// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX11: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-3]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:22: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_ceil_f32_sdwa v5, vccz dst_sel:DWORD src0_sel:DWORD
-// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 // GFX9: v_ceil_f32_sdwa v5, src_vccz dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00]
-// NOVI: :[[@LINE-3]]:21: error: invalid operand for instruction
-// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX11: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-3]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOSICI: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOVI: :[[@LINE-6]]:21: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 
 v_ceil_f32_sdwa v5, |execz| dst_sel:DWORD src0_sel:DWORD
-// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 // GFX9: v_ceil_f32_sdwa v5, |src_execz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00]
-// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction
-// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX11: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-3]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOSICI: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOVI: :[[@LINE-6]]:22: error: invalid operand for instruction
 // NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 
 //---------------------------------------------------------------------------//
@@ -1317,266 +1317,266 @@ v_ceil_f32_sdwa v5, |execz| dst_sel:DWORD src0_sel:DWORD
 //---------------------------------------------------------------------------//
 
 buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095
-// NOSICI: :[[@LINE-1]]:36: error: src_shared_base register not available on this GPU
-// GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb]
 // GFX11: buffer_atomic_add_u32 v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0xd4,0xe0,0x00,0x00,0x00,0xeb]
-// NOVI: :[[@LINE-4]]:36: error: src_shared_base register not available on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-6]]:1: error: operands are not valid for this GPU or mode
+// GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb]
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOSICI: :[[@LINE-5]]:36: error: src_shared_base register not available on this GPU
+// NOVI: :[[@LINE-6]]:36: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:36: error: src_shared_base register not available on this GPU
 
 s_add_i32 s0, src_shared_base, s0
+// GFX11: s_add_i32 s0, src_shared_base, s0       ; encoding: [0xeb,0x00,0x00,0x81]
 // GFX12XX: s_add_co_i32 s0, src_shared_base, s0    ; encoding: [0xeb,0x00,0x00,0x81]
-// NOSICI: :[[@LINE-2]]:15: error: src_shared_base register not available on this GPU
 // GFX9: s_add_i32 s0, src_shared_base, s0       ; encoding: [0xeb,0x00,0x00,0x81]
-// GFX11: s_add_i32 s0, src_shared_base, s0       ; encoding: [0xeb,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-4]]:15: error: src_shared_base register not available on this GPU
 // NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:15: error: src_shared_base register not available on this GPU
 
 s_add_i32 s0, src_shared_limit, s0
+// GFX11: s_add_i32 s0, src_shared_limit, s0      ; encoding: [0xec,0x00,0x00,0x81]
 // GFX12XX: s_add_co_i32 s0, src_shared_limit, s0   ; encoding: [0xec,0x00,0x00,0x81]
-// NOSICI: :[[@LINE-2]]:15: error: src_shared_limit register not available on this GPU
 // GFX9: s_add_i32 s0, src_shared_limit, s0      ; encoding: [0xec,0x00,0x00,0x81]
-// GFX11: s_add_i32 s0, src_shared_limit, s0      ; encoding: [0xec,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-4]]:15: error: src_shared_limit register not available on this GPU
 // NOVI: :[[@LINE-5]]:15: error: src_shared_limit register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:15: error: src_shared_limit register not available on this GPU
 
 s_add_i32 s0, src_private_base, s0
+// GFX11: s_add_i32 s0, src_private_base, s0      ; encoding: [0xed,0x00,0x00,0x81]
 // GFX12XX: s_add_co_i32 s0, src_private_base, s0   ; encoding: [0xed,0x00,0x00,0x81]
-// NOSICI: :[[@LINE-2]]:15: error: src_private_base register not available on this GPU
 // GFX9: s_add_i32 s0, src_private_base, s0      ; encoding: [0xed,0x00,0x00,0x81]
-// GFX11: s_add_i32 s0, src_private_base, s0      ; encoding: [0xed,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-4]]:15: error: src_private_base register not available on this GPU
 // NOVI: :[[@LINE-5]]:15: error: src_private_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:15: error: src_private_base register not available on this GPU
 
 s_add_i32 s0, src_private_limit, s0
+// GFX11: s_add_i32 s0, src_private_limit, s0     ; encoding: [0xee,0x00,0x00,0x81]
 // GFX12XX: s_add_co_i32 s0, src_private_limit, s0  ; encoding: [0xee,0x00,0x00,0x81]
-// NOSICI: :[[@LINE-2]]:15: error: src_private_limit register not available on this GPU
 // GFX9: s_add_i32 s0, src_private_limit, s0     ; encoding: [0xee,0x00,0x00,0x81]
-// GFX11: s_add_i32 s0, src_private_limit, s0     ; encoding: [0xee,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-4]]:15: error: src_private_limit register not available on this GPU
 // NOVI: :[[@LINE-5]]:15: error: src_private_limit register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:15: error: src_private_limit register not available on this GPU
 
 s_add_i32 s0, src_pops_exiting_wave_id, s0
-// NOSICI: :[[@LINE-1]]:15: error: src_pops_exiting_wave_id register not available on this GPU
 // GFX9: s_add_i32 s0, src_pops_exiting_wave_id, s0 ; encoding: [0xef,0x00,0x00,0x81]
-// NOVI: :[[@LINE-3]]:15: error: src_pops_exiting_wave_id register not available on this GPU
-// NOGFX11: :[[@LINE-4]]:15: error: src_pops_exiting_wave_id register not available on this GPU
-// NOGFX12: :[[@LINE-5]]:15: error: src_pops_exiting_wave_id register not available on this GPU
-// NOGFX1250: :[[@LINE-6]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOSICI: :[[@LINE-5]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOVI: :[[@LINE-6]]:15: error: src_pops_exiting_wave_id register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:15: error: src_pops_exiting_wave_id register not available on this GPU
 
 s_and_b64 s[0:1], s[0:1], src_shared_base
+// GFX11: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x8b]
 // GFX12XX: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x8b]
-// NOSICI: :[[@LINE-2]]:27: error: src_shared_base register not available on this GPU
 // GFX9: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x86]
-// GFX11: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x8b]
+// NOSICI: :[[@LINE-4]]:27: error: src_shared_base register not available on this GPU
 // NOVI: :[[@LINE-5]]:27: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:27: error: src_shared_base register not available on this GPU
 
 s_and_b64 s[0:1], s[0:1], src_shared_limit
+// GFX11: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x8b]
 // GFX12XX: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x8b]
-// NOSICI: :[[@LINE-2]]:27: error: src_shared_limit register not available on this GPU
 // GFX9: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x86]
-// GFX11: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x8b]
+// NOSICI: :[[@LINE-4]]:27: error: src_shared_limit register not available on this GPU
 // NOVI: :[[@LINE-5]]:27: error: src_shared_limit register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:27: error: src_shared_limit register not available on this GPU
 
 s_and_b64 s[0:1], s[0:1], src_private_base
+// GFX11: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x8b]
 // GFX12XX: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x8b]
-// NOSICI: :[[@LINE-2]]:27: error: src_private_base register not available on this GPU
 // GFX9: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x86]
-// GFX11: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x8b]
+// NOSICI: :[[@LINE-4]]:27: error: src_private_base register not available on this GPU
 // NOVI: :[[@LINE-5]]:27: error: src_private_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:27: error: src_private_base register not available on this GPU
 
 s_and_b64 s[0:1], s[0:1], src_private_limit
+// GFX11: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x8b]
 // GFX12XX: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x8b]
-// NOSICI: :[[@LINE-2]]:27: error: src_private_limit register not available on this GPU
 // GFX9: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x86]
-// GFX11: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x8b]
+// NOSICI: :[[@LINE-4]]:27: error: src_private_limit register not available on this GPU
 // NOVI: :[[@LINE-5]]:27: error: src_private_limit register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:27: error: src_private_limit register not available on this GPU
 
 s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id
-// NOSICI: :[[@LINE-1]]:27: error: src_pops_exiting_wave_id register not available on this GPU
 // GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86]
-// NOVI: :[[@LINE-3]]:27: error: src_pops_exiting_wave_id register not available on this GPU
-// NOGFX11: :[[@LINE-4]]:27: error: src_pops_exiting_wave_id register not available on this GPU
-// NOGFX12: :[[@LINE-5]]:27: error: src_pops_exiting_wave_id register not available on this GPU
-// NOGFX1250: :[[@LINE-6]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX11: :[[@LINE-2]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX12: :[[@LINE-3]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX1250: :[[@LINE-4]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOSICI: :[[@LINE-5]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOVI: :[[@LINE-6]]:27: error: src_pops_exiting_wave_id register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:27: error: src_pops_exiting_wave_id register not available on this GPU
 
 v_add_u16 v0, src_shared_base, v0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_add_u16_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x4c]
-// NOVI: :[[@LINE-3]]:15: error: src_shared_base register not available on this GPU
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:15: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06]
-// NOVI: :[[@LINE-3]]:20: error: src_shared_base register not available on this GPU
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:20: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86]
-// NOVI: :[[@LINE-3]]:24: error: src_shared_base register not available on this GPU
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:24: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_add_u32 v0, src_shared_base, v0
+// GFX11: v_add_nc_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4a]
 // GFX12XX: v_add_nc_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4a]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX9: v_add_u32_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x68]
-// GFX11: v_add_nc_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4a]
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_add_u32_e64 v0, src_shared_base, v0
+// GFX11: v_add_nc_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xeb,0x00,0x02,0x00]
 // GFX12XX: v_add_nc_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xeb,0x00,0x02,0x00]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX9: v_add_u32_e64 v0, src_shared_base, v0   ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00]
-// GFX11: v_add_nc_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xeb,0x00,0x02,0x00]
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOVI: :[[@LINE-5]]:19: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_cmp_eq_i64 vcc, src_shared_base, v[0:1]
-// NOSICI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU
 // GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d]
-// NOVI: :[[@LINE-3]]:19: error: src_shared_base register not available on this GPU
-// NOGFX11: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
-// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-6]]:1: error: operands are not valid for this GPU or mode
+// NOGFX11: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOSICI: :[[@LINE-5]]:19: error: src_shared_base register not available on this GPU
+// NOVI: :[[@LINE-6]]:19: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU
 
 v_max_f16 v0, src_shared_base, v0
+// GFX11: v_max_f16_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x72]
 // GFX12XX: v_max_num_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x62]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX9: v_max_f16_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x5a]
-// GFX11: v_max_f16_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x72]
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_max_f32 v0, src_shared_base, v0
+// GFX11: v_max_f32_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x20]
 // GFX12XX: v_max_num_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x2c]
-// NOSICI: :[[@LINE-2]]:15: error: src_shared_base register not available on this GPU
 // GFX9: v_max_f32_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x16]
-// GFX11: v_max_f32_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x20]
+// NOSICI: :[[@LINE-4]]:15: error: src_shared_base register not available on this GPU
 // NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:15: error: src_shared_base register not available on this GPU
 
 v_max_f64 v[0:1], src_shared_base, v[0:1]
+// GFX11: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x2a,0xd7,0xeb,0x00,0x02,0x00]
 // GFX12XX: v_max_num_f64_e32 v[0:1], src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0x00,0x1c]
-// NOSICI: :[[@LINE-2]]:19: error: src_shared_base register not available on this GPU
 // GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00]
-// GFX11: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x2a,0xd7,0xeb,0x00,0x02,0x00]
+// NOSICI: :[[@LINE-4]]:19: error: src_shared_base register not available on this GPU
 // NOVI: :[[@LINE-5]]:19: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU
 
 v_pk_add_f16 v0, src_shared_base, v0
+// GFX11: v_pk_add_f16 v0, src_shared_base, v0    ; encoding: [0x00,0x40,0x0f,0xcc,0xeb,0x00,0x02,0x18]
 // GFX12XX: v_pk_add_f16 v0, src_shared_base, v0    ; encoding: [0x00,0x40,0x0f,0xcc,0xeb,0x00,0x02,0x18]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX9: v_pk_add_f16 v0, src_shared_base, v0    ; encoding: [0x00,0x40,0x8f,0xd3,0xeb,0x00,0x02,0x18]
-// GFX11: v_pk_add_f16 v0, src_shared_base, v0    ; encoding: [0x00,0x40,0x0f,0xcc,0xeb,0x00,0x02,0x18]
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_ceil_f16 v0, neg(src_shared_base)
+// GFX11: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
 // GFX12XX: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX9: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20]
-// GFX11: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_ceil_f16 v0, abs(src_shared_base)
+// GFX11: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
 // GFX12XX: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX9: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00]
-// GFX11: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_ceil_f64 v[5:6], |src_shared_base|
-// GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00]
 // GFX11: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x98,0xd5,0xeb,0x00,0x00,0x00]
 // GFX12: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x98,0xd5,0xeb,0x00,0x00,0x00]
-// NOSI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOCI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
-// NOVI: :[[@LINE-6]]:21: error: src_shared_base register not available on this GPU
-// NOGFX1250: :[[@LINE-7]]:12: error: invalid operand for instruction
+// GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00]
+// NOCI: :[[@LINE-4]]:21: error: src_shared_base register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:12: error: invalid operand for instruction
+// NOSI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-7]]:21: error: src_shared_base register not available on this GPU
 // NOCIVI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
 
 v_ceil_f64 v[5:6], -src_shared_base
-// GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20]
 // GFX11: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x98,0xd5,0xeb,0x00,0x00,0x20]
 // GFX12: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x98,0xd5,0xeb,0x00,0x00,0x20]
-// NOSI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOCI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
-// NOVI: :[[@LINE-6]]:21: error: src_shared_base register not available on this GPU
-// NOGFX1250: :[[@LINE-7]]:12: error: invalid operand for instruction
+// GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20]
+// NOCI: :[[@LINE-4]]:21: error: src_shared_base register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:12: error: invalid operand for instruction
+// NOSI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-7]]:21: error: src_shared_base register not available on this GPU
 // NOCIVI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
 
 v_ceil_f32 v0, -src_shared_base
+// GFX11: v_ceil_f32_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xa2,0xd5,0xeb,0x00,0x00,0x20]
 // GFX12XX: v_ceil_f32_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xa2,0xd5,0xeb,0x00,0x00,0x20]
-// NOSICI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU
 // GFX9: v_ceil_f32_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0x5d,0xd1,0xeb,0x00,0x00,0x20]
-// GFX11: v_ceil_f32_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xa2,0xd5,0xeb,0x00,0x00,0x20]
+// NOSICI: :[[@LINE-4]]:17: error: src_shared_base register not available on this GPU
 // NOVI: :[[@LINE-5]]:17: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
 
 v_ceil_f32 v0, |src_shared_base|
+// GFX11: v_ceil_f32_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xa2,0xd5,0xeb,0x00,0x00,0x00]
 // GFX12XX: v_ceil_f32_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xa2,0xd5,0xeb,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU
 // GFX9: v_ceil_f32_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0x5d,0xd1,0xeb,0x00,0x00,0x00]
-// GFX11: v_ceil_f32_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xa2,0xd5,0xeb,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-4]]:17: error: src_shared_base register not available on this GPU
 // NOVI: :[[@LINE-5]]:17: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
 
 v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
-// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU
-// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX11: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-3]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:22: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00]
-// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU
-// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX11: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-3]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:22: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD src0_sel:DWORD
-// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 // GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00]
-// NOVI: :[[@LINE-3]]:21: error: src_shared_base register not available on this GPU
-// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX11: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-3]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOSICI: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOVI: :[[@LINE-6]]:21: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 
 v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD
-// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 // GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
-// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU
-// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
-// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX11: :[[@LINE-2]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-3]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOSICI: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOVI: :[[@LINE-6]]:22: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 
 //---------------------------------------------------------------------------//
@@ -1584,206 +1584,206 @@ v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD
 //---------------------------------------------------------------------------//
 
 v_add_u32 v0, private_base, s0
-// GFX12XX: v_add_nc_u32_e64 v0, src_private_base, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xed,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_add_nc_u32_e64 v0, src_private_base, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xed,0x00,0x00,0x00]
-// NOVI: :[[@LINE-4]]:15: error: src_private_base register not available on this GPU
-// NOGFX9: :[[@LINE-5]]:29: error: invalid operand (violates constant bus restrictions)
+// GFX12XX: v_add_nc_u32_e64 v0, src_private_base, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xed,0x00,0x00,0x00]
+// NOGFX9: :[[@LINE-3]]:29: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-5]]:15: error: src_private_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_add_u32 v0, scc, s0
-// GFX12XX: v_add_nc_u32_e64 v0, src_scc, s0        ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_add_nc_u32_e64 v0, src_scc, s0        ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x00,0x00]
-// NOVI: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
-// NOGFX9: :[[@LINE-5]]:20: error: invalid operand (violates constant bus restrictions)
+// GFX12XX: v_add_nc_u32_e64 v0, src_scc, s0        ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x00,0x00]
+// NOGFX9: :[[@LINE-3]]:20: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 // v_div_fmas implicitly reads VCC
 v_div_fmas_f32 v0, shared_base, v0, v1
-// GFX12XX: v_div_fmas_f32 v0, src_shared_base, v0, v1 ; encoding: [0x00,0x00,0x37,0xd6,0xeb,0x00,0x06,0x04]
-// NOSICI: :[[@LINE-2]]:20: error: src_shared_base register not available on this GPU
 // GFX11: v_div_fmas_f32 v0, src_shared_base, v0, v1 ; encoding: [0x00,0x00,0x37,0xd6,0xeb,0x00,0x06,0x04]
-// NOVI: :[[@LINE-4]]:20: error: src_shared_base register not available on this GPU
-// NOGFX9: :[[@LINE-5]]:20: error: invalid operand (violates constant bus restrictions)
+// GFX12XX: v_div_fmas_f32 v0, src_shared_base, v0, v1 ; encoding: [0x00,0x00,0x37,0xd6,0xeb,0x00,0x06,0x04]
+// NOGFX9: :[[@LINE-3]]:20: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-4]]:20: error: src_shared_base register not available on this GPU
+// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:20: error: src_shared_base register not available on this GPU
 
 // v_div_fmas implicitly reads VCC
 v_div_fmas_f32 v0, v0, shared_limit, v1
-// GFX12XX: v_div_fmas_f32 v0, v0, src_shared_limit, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xd9,0x05,0x04]
-// NOSICI: :[[@LINE-2]]:24: error: src_shared_limit register not available on this GPU
 // GFX11: v_div_fmas_f32 v0, v0, src_shared_limit, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xd9,0x05,0x04]
-// NOVI: :[[@LINE-4]]:24: error: src_shared_limit register not available on this GPU
-// NOGFX9: :[[@LINE-5]]:24: error: invalid operand (violates constant bus restrictions)
+// GFX12XX: v_div_fmas_f32 v0, v0, src_shared_limit, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xd9,0x05,0x04]
+// NOGFX9: :[[@LINE-3]]:24: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-4]]:24: error: src_shared_limit register not available on this GPU
+// NOVI: :[[@LINE-5]]:24: error: src_shared_limit register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:24: error: src_shared_limit register not available on this GPU
 
 // v_div_fmas implicitly reads VCC
 v_div_fmas_f32 v0, v0, v1, private_limit
-// GFX12XX: v_div_fmas_f32 v0, v0, v1, src_private_limit ; encoding: [0x00,0x00,0x37,0xd6,0x00,0x03,0xba,0x03]
-// NOSICI: :[[@LINE-2]]:28: error: src_private_limit register not available on this GPU
 // GFX11: v_div_fmas_f32 v0, v0, v1, src_private_limit ; encoding: [0x00,0x00,0x37,0xd6,0x00,0x03,0xba,0x03]
-// NOVI: :[[@LINE-4]]:28: error: src_private_limit register not available on this GPU
-// NOGFX9: :[[@LINE-5]]:28: error: invalid operand (violates constant bus restrictions)
+// GFX12XX: v_div_fmas_f32 v0, v0, v1, src_private_limit ; encoding: [0x00,0x00,0x37,0xd6,0x00,0x03,0xba,0x03]
+// NOGFX9: :[[@LINE-3]]:28: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-4]]:28: error: src_private_limit register not available on this GPU
+// NOVI: :[[@LINE-5]]:28: error: src_private_limit register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:28: error: src_private_limit register not available on this GPU
 
 // v_div_fmas implicitly reads VCC
 v_div_fmas_f32 v0, execz, v0, v1
-// NOSICI: :[[@LINE-1]]:20: error: invalid operand (violates constant bus restrictions)
-// NOGFX89: :[[@LINE-2]]:20: error: invalid operand (violates constant bus restrictions)
-// NOGFX11: :[[@LINE-3]]:20: error: src_execz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:20: error: src_execz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:20: error: src_execz register not available on this GPU
+// NOGFX11: :[[@LINE-1]]:20: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-2]]:20: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-3]]:20: error: src_execz register not available on this GPU
+// NOGFX89: :[[@LINE-4]]:20: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-5]]:20: error: invalid operand (violates constant bus restrictions)
 // NOSICIVI: :[[@LINE-1]]:20: error: invalid operand (violates constant bus restrictions)
 
 // v_div_fmas implicitly reads VCC
 v_div_fmas_f32 v0, v0, scc, v1
+// GFX11: v_div_fmas_f32 v0, v0, src_scc, v1      ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xfb,0x05,0x04]
 // GFX12XX: v_div_fmas_f32 v0, v0, src_scc, v1      ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xfb,0x05,0x04]
-// NOSICI: :[[@LINE-2]]:24: error: invalid operand (violates constant bus restrictions)
 // NOGFX89: :[[@LINE-3]]:24: error: invalid operand (violates constant bus restrictions)
-// GFX11: v_div_fmas_f32 v0, v0, src_scc, v1      ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xfb,0x05,0x04]
+// NOSICI: :[[@LINE-4]]:24: error: invalid operand (violates constant bus restrictions)
 // NOSICIVI: :[[@LINE-1]]:24: error: invalid operand (violates constant bus restrictions)
 
 // v_div_fmas implicitly reads VCC
 v_div_fmas_f32 v0, v0, v1, vccz
-// NOSICI: :[[@LINE-1]]:28: error: invalid operand (violates constant bus restrictions)
-// NOGFX89: :[[@LINE-2]]:28: error: invalid operand (violates constant bus restrictions)
-// NOGFX11: :[[@LINE-3]]:28: error: src_vccz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:28: error: src_vccz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:28: error: src_vccz register not available on this GPU
+// NOGFX11: :[[@LINE-1]]:28: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-2]]:28: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-3]]:28: error: src_vccz register not available on this GPU
+// NOGFX89: :[[@LINE-4]]:28: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-5]]:28: error: invalid operand (violates constant bus restrictions)
 // NOSICIVI: :[[@LINE-1]]:28: error: invalid operand (violates constant bus restrictions)
 
 // v_addc_co_u32 implicitly reads VCC (VOP2)
 v_addc_co_u32 v0, vcc, shared_base, v0, vcc
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE-3]]:24: error: invalid operand (violates constant bus restrictions)
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-4]]:24: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_madak_f32 v0, shared_base, v0, 0x11213141
-// NOSICI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
-// NOVI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU
-// NOGFX9: :[[@LINE-3]]:17: error: invalid operand (violates constant bus restrictions)
-// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-4]]:17: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-5]]:17: error: src_shared_base register not available on this GPU
+// NOVI: :[[@LINE-6]]:17: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
 
 v_madak_f32 v0, scc, v0, 0x11213141
-// NOSICI: :[[@LINE-1]]:17: error: invalid operand (violates constant bus restrictions)
-// NOGFX89: :[[@LINE-2]]:17: error: invalid operand (violates constant bus restrictions)
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:17: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-5]]:17: error: invalid operand (violates constant bus restrictions)
 // NOSICIVI: :[[@LINE-1]]:17: error: invalid operand (violates constant bus restrictions)
 
 v_madak_f32 v0, 0xff32ff, v0, 0x11213141
-// NOSICI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
-// NOGFX89: :[[@LINE-2]]:31: error: only one unique literal operand is allowed
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:31: error: only one unique literal operand is allowed
+// NOSICI: :[[@LINE-5]]:31: error: only one unique literal operand is allowed
 // NOSICIVI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
 
 v_madak_f32 v0, 0xff32ff, v0, 1
-// NOSICI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
-// NOGFX89: :[[@LINE-2]]:31: error: only one unique literal operand is allowed
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:31: error: only one unique literal operand is allowed
+// NOSICI: :[[@LINE-5]]:31: error: only one unique literal operand is allowed
 // NOSICIVI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
 
 v_madmk_f32 v0, 0xff32ff, 0x11213141, v0
-// NOSICI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
-// NOGFX89: :[[@LINE-2]]:27: error: only one unique literal operand is allowed
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:27: error: only one unique literal operand is allowed
+// NOSICI: :[[@LINE-5]]:27: error: only one unique literal operand is allowed
 // NOSICIVI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
 
 v_madmk_f32 v0, 0xff32ff, -1, v0
-// NOSICI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
-// NOGFX89: :[[@LINE-2]]:27: error: only one unique literal operand is allowed
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:27: error: only one unique literal operand is allowed
+// NOSICI: :[[@LINE-5]]:27: error: only one unique literal operand is allowed
 // NOSICIVI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
 
 v_madak_f16 v0, 0xff32, v0, 0x1122
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:29: error: only one unique literal operand is allowed
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:29: error: only one unique literal operand is allowed
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_madak_f16 v0, 0xff32, v0, 0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:29: error: only one unique literal operand is allowed
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:29: error: only one unique literal operand is allowed
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_madmk_f16 v0, 0xff32, 0x1122, v0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:25: error: only one unique literal operand is allowed
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:25: error: only one unique literal operand is allowed
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_madmk_f16 v0, 0xff32, 1, v0
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-2]]:25: error: only one unique literal operand is allowed
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:25: error: only one unique literal operand is allowed
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_cmp_eq_f32 s[0:1], private_base, private_limit
-// NOSICI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
-// NOVI: :[[@LINE-2]]:22: error: src_private_base register not available on this GPU
-// NOGFX9: :[[@LINE-3]]:36: error: invalid operand (violates constant bus restrictions)
-// NOGFX11: :[[@LINE-4]]:14: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-5]]:14: error: invalid operand for instruction
-// NOGFX1250: :[[@LINE-6]]:14: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-1]]:14: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-2]]:14: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-3]]:14: error: invalid operand for instruction
+// NOGFX9: :[[@LINE-4]]:36: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-5]]:22: error: src_private_base register not available on this GPU
+// NOVI: :[[@LINE-6]]:22: error: src_private_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
 
 v_cmp_eq_f32 s[0:1], private_base, s0
-// NOSICI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
-// NOVI: :[[@LINE-2]]:22: error: src_private_base register not available on this GPU
-// NOGFX9: :[[@LINE-3]]:36: error: invalid operand (violates constant bus restrictions)
-// NOGFX11: :[[@LINE-4]]:14: error: invalid operand for instruction
-// NOGFX12: :[[@LINE-5]]:14: error: invalid operand for instruction
-// NOGFX1250: :[[@LINE-6]]:14: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-1]]:14: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-2]]:14: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-3]]:14: error: invalid operand for instruction
+// NOGFX9: :[[@LINE-4]]:36: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-5]]:22: error: src_private_base register not available on this GPU
+// NOVI: :[[@LINE-6]]:22: error: src_private_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
 
 v_cmp_eq_f32 s[0:1], execz, s0
-// NOSICI: :[[@LINE-1]]:29: error: invalid operand (violates constant bus restrictions)
-// NOGFX89: :[[@LINE-2]]:29: error: invalid operand (violates constant bus restrictions)
-// NOGFX11: :[[@LINE-3]]:22: error: src_execz register not available on this GPU
-// NOGFX12: :[[@LINE-4]]:22: error: src_execz register not available on this GPU
-// NOGFX1250: :[[@LINE-5]]:22: error: src_execz register not available on this GPU
+// NOGFX11: :[[@LINE-1]]:22: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-2]]:22: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-3]]:22: error: src_execz register not available on this GPU
+// NOGFX89: :[[@LINE-4]]:29: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-5]]:29: error: invalid operand (violates constant bus restrictions)
 // NOSICIVI: :[[@LINE-1]]:29: error: invalid operand (violates constant bus restrictions)
 
 v_pk_add_f16 v255, private_base, private_limit
-// GFX12XX: v_pk_add_f16 v255, src_private_base, src_private_limit ; encoding: [0xff,0x40,0x0f,0xcc,0xed,0xdc,0x01,0x18]
-// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // GFX11: v_pk_add_f16 v255, src_private_base, src_private_limit ; encoding: [0xff,0x40,0x0f,0xcc,0xed,0xdc,0x01,0x18]
-// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE-5]]:34: error: invalid operand (violates constant bus restrictions)
+// GFX12XX: v_pk_add_f16 v255, src_private_base, src_private_limit ; encoding: [0xff,0x40,0x0f,0xcc,0xed,0xdc,0x01,0x18]
+// NOGFX9: :[[@LINE-3]]:34: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_pk_add_f16 v255, vccz, execz
-// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE-3]]:26: error: invalid operand (violates constant bus restrictions)
-// NOGFX11: :[[@LINE-4]]:20: error: src_vccz register not available on this GPU
-// NOGFX12: :[[@LINE-5]]:20: error: src_vccz register not available on this GPU
-// NOGFX1250: :[[@LINE-6]]:20: error: src_vccz register not available on this GPU
+// NOGFX11: :[[@LINE-1]]:20: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-2]]:20: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-3]]:20: error: src_vccz register not available on this GPU
+// NOGFX9: :[[@LINE-4]]:26: error: invalid operand (violates constant bus restrictions)
+// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
@@ -1791,36 +1791,36 @@ v_pk_add_f16 v255, vccz, execz
 //---------------------------------------------------------------------------//
 
 v_sqrt_f32 v2, lit(123)
-// SICI: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX89: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // GFX11: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX89: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// SICI: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
 
 v_sqrt_f32 v2, abs(lit(123))
-// SICI: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX89: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // GFX11: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX89: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// SICI: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
 
 v_sqrt_f32 v2, lit(123.0)
-// SICI: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
-// GFX89: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x4e,0x04,0x7e,0x00,0x00,0xf6,0x42]
-// GFX12XX: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
 // GFX11: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX12XX: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX89: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x4e,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// SICI: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
 
 v_sqrt_f64 v[2:3], lit(123.0)
-// SICI: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
-// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x50,0x04,0x7e,0x00,0xc0,0x5e,0x40]
 // GFX11: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
 // GFX12: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
 // GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xfe,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40,0x00,0x00,0x00,0x00]
+// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x50,0x04,0x7e,0x00,0xc0,0x5e,0x40]
+// SICI: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
 
 v_sqrt_f64 v[2:3], lit(123)
-// SICI: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x50,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // GFX11: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // GFX12: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xfe,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x50,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// SICI: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
 
 v_sqrt_f32 v2, lit 123.0
 // NOGCN: :[[@LINE-1]]:20: error: expected left paren after lit
@@ -1834,16 +1834,16 @@ v_sqrt_f32 v2, lit(v1)
 // Make sure lit() is accepted on operands without modifiers.
 
 v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8)
-// SICI: v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8) ; encoding: [0xff,0x10,0x08,0x42,0xe8,0x07,0x00,0x00]
 // GFX89: v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8) ; encoding: [0xff,0x10,0x08,0x30,0xe8,0x07,0x00,0x00]
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// SICI: v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8) ; encoding: [0xff,0x10,0x08,0x42,0xe8,0x07,0x00,0x00]
 
 v_madak_f32 v4, lit(lit(0x7e8)), v8, lit(0x7e8)
-// NOSICI: :[[@LINE-1]]:24: error: not a valid operand.
-// NOGFX89: :[[@LINE-2]]:24: error: not a valid operand.
-// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-4]]:24: error: not a valid operand.
+// NOSICI: :[[@LINE-5]]:24: error: not a valid operand.
 // NOSICIVI: :[[@LINE-1]]:24: error: not a valid operand.
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
index 29bfa54..7af0bfe5 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -237,64 +237,76 @@
 # GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
 
 0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_min_i32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_min_i32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
 
 0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_min_i32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_min_i32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
 
 0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_min_i32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_min_i32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
 
 0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_min_i32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_min_i32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
 
 0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_min_i32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
+
+0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
 
 0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_max_i32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_max_i32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
 
 0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_max_i32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_max_i32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
 
 0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_max_i32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_max_i32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
 
 0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_max_i32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_max_i32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
 
 0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_max_i32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
 
 0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_min_u32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_min_u32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
 
 0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_min_u32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_min_u32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
 
 0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_min_u32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_min_u32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
 
 0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_min_u32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_min_u32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
 
 0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_min_u32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
 
 0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_max_u32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_max_u32 v2, s4, 4, v2             ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
 
 0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_max_u32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_max_u32 v2, s4, v7, v8            ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
 
 0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_max_u32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_max_u32 v2, v4, 0, 1              ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
 
 0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_max_u32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_max_u32 v2, v4, 3, s2             ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
 
 0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_max_u32 v2, v4, v7, 0x3039        ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp      ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
 
 0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf
 # GFX1250: v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf]
diff --git a/llvm/test/MC/RISCV/xsfvfexp.s b/llvm/test/MC/RISCV/xsfvfexp.s
new file mode 100644
index 0000000..bd6aecd
--- /dev/null
+++ b/llvm/test/MC/RISCV/xsfvfexp.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+xsfvfexp32e %s \
+# RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexp32e %s \
+# RUN:        | llvm-objdump -d --mattr=+xsfvfexp32e - \
+# RUN:        | FileCheck %s --check-prefix=CHECK-INST
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexp32e %s \
+# RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+xsfvfexp16e %s \
+# RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexp16e %s \
+# RUN:        | llvm-objdump -d --mattr=+xsfvfexp16e - \
+# RUN:        | FileCheck %s --check-prefix=CHECK-INST
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexp16e %s \
+# RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+zvfbfmin,+xsfvfbfexp16e %s \
+# RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+zvfbfmin,+xsfvfbfexp16e %s \
+# RUN:        | llvm-objdump -d --mattr=+xsfvfbfexp16e - \
+# RUN:        | FileCheck %s --check-prefix=CHECK-INST
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+zvfbfmin,+xsfvfbfexp16e %s \
+# RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sf.vfexp.v v2, v5, v0.t
+# CHECK-INST: sf.vfexp.v v2, v5, v0.t
+# CHECK-ENCODING: [0x57,0x91,0x53,0x4c]
+# CHECK-ERROR: instruction requires the following: 'Xsfvfbfexp16e', 'Xsfvfexp16e', or 'Xsfvfexp32e' (SiFive Vector Floating-Point Exponential Function Instruction){{$}}
+# CHECK-UNKNOWN: 4c539157 <unknown>
diff --git a/llvm/test/MC/RISCV/xsfvfexpa.s b/llvm/test/MC/RISCV/xsfvfexpa.s
new file mode 100644
index 0000000..317a103
--- /dev/null
+++ b/llvm/test/MC/RISCV/xsfvfexpa.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+xsfvfexpa %s \
+# RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexpa %s \
+# RUN:        | llvm-objdump -d --mattr=+xsfvfexpa - \
+# RUN:        | FileCheck %s --check-prefix=CHECK-INST
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexpa %s \
+# RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sf.vfexpa.v v2, v5, v0.t
+# CHECK-INST: sf.vfexpa.v v2, v5, v0.t
+# CHECK-ENCODING: [0x57,0x11,0x53,0x4c]
+# CHECK-ERROR: instruction requires the following: 'Xsfvfexpa' (SiFive Vector Floating-Point Exponential Approximation Instruction){{$}}
+# CHECK-UNKNOWN: 4c531157 <unknown>
diff --git a/llvm/test/ThinLTO/AArch64/aarch64_inline.ll b/llvm/test/ThinLTO/AArch64/aarch64_inline.ll
new file mode 100644
index 0000000..401f66d
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/aarch64_inline.ll
@@ -0,0 +1,86 @@
+;; Test verifies inlining happens cross module when module flags are upgraded.
+;; `foo` and `main` are both old semantic while bar is the new semantic.
+;; Regression test for #82763
+
+; RUN: split-file %s %t
+; RUN: opt -module-summary %t/foo.ll -o %t/foo.o
+; RUN: opt -module-summary %t/bar.ll -o %t/bar.o
+; RUN: opt -module-summary %t/main.ll -o %t/main.o
+; RUN: llvm-lto2 run %t/main.o %t/foo.o %t/bar.o -save-temps \
+; RUN:   -o %t/t.exe \
+; RUN:   -r=%t/foo.o,foo,plx \
+; RUN:   -r=%t/bar.o,bar,plx \
+; RUN:   -r=%t/main.o,foo,l \
+; RUN:   -r=%t/main.o,bar,l \
+; RUN:   -r=%t/main.o,main,plx 2>&1
+; RUN: llvm-dis %t/t.exe.1.4.opt.bc -o - | FileCheck %s
+
+; CHECK:      define dso_local noundef i32 @main() local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:  ret i32 35
+; CHECK-NEXT: }
+
+; CHECK:  attributes #0 = { {{.*}}"branch-target-enforcement" "sign-return-address"="all" "sign-return-address-key"="b_key" }
+
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !3}
+
+; CHECK: !0 = !{i32 8, !"branch-target-enforcement", i32 2}
+; CHECK: !1 = !{i32 8, !"sign-return-address", i32 2}
+; CHECK: !2 = !{i32 8, !"sign-return-address-all", i32 2}
+; CHECK: !3 = !{i32 8, !"sign-return-address-with-bkey", i32 2}
+
+
+;--- foo.ll
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define dso_local noundef i32 @foo() local_unnamed_addr #0 {
+entry:
+  ret i32 34
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+!llvm.module.flags = !{!0, !1, !2, !3 }
+!0 = !{i32 8, !"branch-target-enforcement", i32 1}
+!1 = !{i32 8, !"sign-return-address", i32 1}
+!2 = !{i32 8, !"sign-return-address-all", i32 1}
+!3 = !{i32 8, !"sign-return-address-with-bkey", i32 1}
+
+;--- bar.ll
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define dso_local noundef i32 @bar() local_unnamed_addr #0 {
+entry:
+  ret i32 1
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "branch-target-enforcement" "sign-return-address"="all" "sign-return-address-key"="b_key" }
+!llvm.module.flags = !{!0, !1, !2, !3 }
+!0 = !{i32 8, !"branch-target-enforcement", i32 2}
+!1 = !{i32 8, !"sign-return-address", i32 2}
+!2 = !{i32 8, !"sign-return-address-all", i32 2}
+!3 = !{i32 8, !"sign-return-address-with-bkey", i32 2}
+
+;--- main.ll
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare i32 @foo();
+declare i32 @bar();
+
+define i32 @main() #0 {
+entry:
+  %1 = call i32 @foo()
+  %2 = call i32 @bar()
+  %3 = add i32 %1, %2
+  ret i32 %3
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0, !1, !2, !3 }
+!0 = !{i32 8, !"branch-target-enforcement", i32 1}
+!1 = !{i32 8, !"sign-return-address", i32 1}
+!2 = !{i32 8, !"sign-return-address-all", i32 1}
+!3 = !{i32 8, !"sign-return-address-with-bkey", i32 1}
diff --git a/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll
new file mode 100644
index 0000000..ae3c746
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll
@@ -0,0 +1,338 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=dse -S %s | FileCheck %s
+
+define void @dead_unstrided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_unstrided_store_non_matrix_load(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = load double, ptr [[SRC]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  %l = load double, ptr %src
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @live_unstrided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @live_unstrided_store_non_matrix_load(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L_1:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L_2:%.*]] = load double, ptr [[DST]], align 8
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L_1]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  %l.2 = load double, ptr %dst
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l.1, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @dead_strided_store(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_strided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2)
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 100, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @live_strided_store(ptr %ptr) {
+; CHECK-LABEL: define void @live_strided_store(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[PTR]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[PTR]], i32 200, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[PTR]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %ptr, i32 100, i1 false, i32 4, i32 2)
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 200, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %ptr, i32 100, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @dead_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_strided_store_non_matrix_load(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L_1:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = load double, ptr [[SRC]], align 8
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L_1]], ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2)
+  %l.2 = load double, ptr %src
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l.1, ptr %dst, i32 100, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @live_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @live_strided_store_non_matrix_load(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L_1:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L_2:%.*]] = load double, ptr [[DST]], align 8
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L_1]], ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2)
+  %l.2 = load double, ptr %dst
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l.1, ptr %dst, i32 100, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @dead_dynamically_strided_store(ptr noalias %src, ptr noalias %dst, i32 %stride) {
+; CHECK-LABEL: define void @dead_dynamically_strided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @live_dynamically_strided_store(ptr %ptr, i32 %stride) {
+; CHECK-LABEL: define void @live_dynamically_strided_store(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[PTR]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[PTR]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[PTR]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %ptr, i32 %stride, i1 false, i32 4, i32 2)
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 %stride, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %ptr, i32 %stride, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @dead_dynamically_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst, i32 %stride) {
+; CHECK-LABEL: define void @dead_dynamically_strided_store_non_matrix_load(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L_1:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = load double, ptr [[SRC]], align 8
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L_1]], ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
+  %l.2 = load double, ptr %src
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l.1, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @live_dynamically_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst, i32 %stride) {
+; CHECK-LABEL: define void @live_dynamically_strided_store_non_matrix_load(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L_1:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L_2:%.*]] = load double, ptr [[DST]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
+  %l.2 = load double, ptr %dst
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @dead_unstrided_store(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_unstrided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @live_unstrided_store(ptr %ptr) {
+; CHECK-LABEL: define void @live_unstrided_store(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[PTR]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[PTR]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[PTR]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %ptr, i32 4, i1 false, i32 4, i32 2)
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %ptr, i32 4, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @dead_non_matrix_store(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_non_matrix_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[DST_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 6
+; CHECK-NEXT:    store double 4.200000e+01, ptr [[DST_OFFSET]], align 8
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %dst.offset = getelementptr inbounds double, ptr %src, i32 6
+  store double 42.0, ptr %dst.offset
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @live_non_matrix_store(ptr %ptr) {
+; CHECK-LABEL: define void @live_non_matrix_store(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR_OFFSET:%.*]] = getelementptr inbounds double, ptr [[PTR]], i32 6
+; CHECK-NEXT:    store double 4.200000e+01, ptr [[PTR_OFFSET]], align 8
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[PTR]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[PTR]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr.offset = getelementptr inbounds double, ptr %ptr, i32 6
+  store double 42.0, ptr %ptr.offset
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %ptr, i32 4, i1 false, i32 4, i32 2)
+  ret void
+}
+
+define void @dead_matrix_store_non_matrix_overwrite_unstrided(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_matrix_store_non_matrix_overwrite_unstrided(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    store <8 x double> zeroinitializer, ptr [[DST]], align 64
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  store <8 x double> zeroinitializer, ptr %dst
+  ret void
+}
+
+define void @dead_matrix_store_non_matrix_overwrite_strided(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_matrix_store_non_matrix_overwrite_strided(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    store <16 x double> zeroinitializer, ptr [[DST]], align 128
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 8, i1 false, i32 4, i32 2)
+  store <16 x double> zeroinitializer, ptr %dst
+  ret void
+}
+
+define void @live_matrix_store_non_matrix_overwrite_unstrided(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @live_matrix_store_non_matrix_overwrite_unstrided(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    store <4 x double> zeroinitializer, ptr [[DST]], align 32
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  store <4 x double> zeroinitializer, ptr %dst
+  ret void
+}
+
+define void @live_matrix_store_non_matrix_overwrite_strided(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @live_matrix_store_non_matrix_overwrite_strided(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    store <8 x double> zeroinitializer, ptr [[DST]], align 64
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 8, i1 false, i32 4, i32 2)
+  store <8 x double> zeroinitializer, ptr %dst
+  ret void
+}
+
+define void @dead_matrix_store_dimension_change(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_matrix_store_dimension_change(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v9f64.i32(<9 x double> zeroinitializer, ptr [[DST]], i32 3, i1 false, i32 3, i32 3)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 8, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v9f64.i32(<9 x double> zeroinitializer, ptr %dst, i32 3, i1 false, i32 3, i32 3)
+  ret void
+}
+
+define void @live_matrix_store_dimension_change(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @live_matrix_store_dimension_change(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v9f64.i32(<9 x double> zeroinitializer, ptr [[DST]], i32 3, i1 false, i32 3, i32 3)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 8, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v9f64.i32(<9 x double> zeroinitializer, ptr %dst, i32 3, i1 false, i32 3, i32 3)
+  call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+  ret void
+}
+
+declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
+declare <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr, i32, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
diff --git a/llvm/test/Transforms/GVN/matrix-intrinsics.ll b/llvm/test/Transforms/GVN/matrix-intrinsics.ll
new file mode 100644
index 0000000..78dbfe1
--- /dev/null
+++ b/llvm/test/Transforms/GVN/matrix-intrinsics.ll
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=gvn -S %s | FileCheck %s
+
+define void @redundant_unstrided_load(ptr %src) {
+; CHECK-LABEL: define void @redundant_unstrided_load(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 8
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @use(<8 x double> [[L]])
+; CHECK-NEXT:    call void @use(<8 x double> [[L_2]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src.offset = getelementptr inbounds double, ptr %src, i32 8
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64(<8 x double> %l, ptr %src, i32 4, i1 false, i32 4, i32 2)
+  %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2)
+  call void @use(<8 x double> %l)
+  call void @use(<8 x double> %l.2)
+  ret void
+}
+
+define void @redundant_unstrided_load_non_matrix_store(ptr %src) {
+; CHECK-LABEL: define void @redundant_unstrided_load_non_matrix_store(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 1
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    store double 4.200000e+01, ptr [[SRC]], align 8
+; CHECK-NEXT:    [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @use(<8 x double> [[L]])
+; CHECK-NEXT:    call void @use(<8 x double> [[L_2]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src.offset = getelementptr inbounds double, ptr %src, i32 1
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2)
+  store double 42.0, ptr %src
+  %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2)
+  call void @use(<8 x double> %l)
+  call void @use(<8 x double> %l.2)
+  ret void
+}
+
+define void @redundant_strided_load(ptr %src) {
+; CHECK-LABEL: define void @redundant_strided_load(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 16
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[SRC]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @use(<8 x double> [[L]])
+; CHECK-NEXT:    call void @use(<8 x double> [[L_2]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src.offset = getelementptr inbounds double, ptr %src, i32 16
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+  call void @llvm.matrix.column.major.store.v8f64(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+  %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+  call void @use(<8 x double> %l)
+  call void @use(<8 x double> %l.2)
+  ret void
+
+}
+
+define void @redundant_strided_load_non_matrix_store(ptr %src) {
+; CHECK-LABEL: define void @redundant_strided_load_non_matrix_store(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 16
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    store double 4.200000e+01, ptr [[SRC]], align 8
+; CHECK-NEXT:    [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    call void @use(<8 x double> [[L]])
+; CHECK-NEXT:    call void @use(<8 x double> [[L_2]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src.offset = getelementptr inbounds double, ptr %src, i32 16
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+  store double 42.0, ptr %src
+  %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+  call void @use(<8 x double> %l)
+  call void @use(<8 x double> %l.2)
+  ret void
+}
+
+define void @repeat_load_dimension_change_project(ptr %src) {
+; CHECK-LABEL: define void @repeat_load_dimension_change_project(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L_2:%.*]] = call <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr [[SRC]], i32 3, i1 false, i32 3, i32 3)
+; CHECK-NEXT:    [[L_3:%.*]] = shufflevector <9 x double> [[L_2]], <9 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    call void @use(<8 x double> [[L]])
+; CHECK-NEXT:    call void @use(<8 x double> [[L_3]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  %l.2 = call <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr %src, i32 3, i1 false, i32 3, i32 3)
+  %l.3 = shufflevector <9 x double> %l.2, <9 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  call void @use(<8 x double> %l)
+  call void @use(<8 x double> %l.3)
+  ret void
+}
+
+define void @repeat_load_dimension_change_shuffle(ptr %src) {
+; CHECK-LABEL: define void @repeat_load_dimension_change_shuffle(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT:    [[L_2:%.*]] = call <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr [[SRC]], i32 3, i1 false, i32 3, i32 3)
+; CHECK-NEXT:    [[L_3:%.*]] = shufflevector <9 x double> [[L_2]], <9 x double> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    call void @use(<8 x double> [[L]])
+; CHECK-NEXT:    call void @use(<8 x double> [[L_3]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+  %l.2 = call <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr %src, i32 3, i1 false, i32 3, i32 3)
+  %l.3 = shufflevector <9 x double> %l.2, <9 x double> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  call void @use(<8 x double> %l)
+  call void @use(<8 x double> %l.3)
+  ret void
+}
+
+declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
+declare <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr, i32, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
+declare void @use(<8 x double>)
diff --git a/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll b/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll
index b3f2e81..15ce3e3 100644
--- a/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll
@@ -5,7 +5,7 @@
 ; attributes that should be transferred only if it is on all of the regions.
 
 ; This includes the attributes, no-nans-fp-math,
-; no-signed-zeros-fp-math, less-precise-fpmad, unsafe-fp-math, and
+; no-signed-zeros-fp-math, less-precise-fpmad, and
 ; no-infs-fp-math.  Only when each instance of similarity has these attributes
 ; can we say that the outlined function can have these attributes since that
 ; is the more general case for these attributes.
@@ -101,7 +101,7 @@ entry:
 }
 
 attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "less-precise-fpmad"="true"
-"unsafe-fp-math"="true" "no-infs-fp-math"="true"}
+"no-infs-fp-math"="true"}
 
 ; CHECK: define internal void @outlined_ir_func_0(ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]]) [[ATTR1:#[0-9]+]] {
 ; CHECK: entry_to_outline:
@@ -122,5 +122,5 @@ attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "les
 ; CHECK-NEXT:    [[CL:%.*]] = load i32, ptr [[ARG2]], align 4
 
 
-; CHECK: attributes [[ATTR1]] =   { minsize optsize "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "unsafe-fp-math"="false" }
-; CHECK: attributes [[ATTR]] = { minsize optsize "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "unsafe-fp-math"="true" }
+; CHECK: attributes [[ATTR1]] =   { minsize optsize "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" }
+; CHECK: attributes [[ATTR]] = { minsize optsize "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/Transforms/Inline/attributes.ll b/llvm/test/Transforms/Inline/attributes.ll
index 55ab430..da7eeda 100644
--- a/llvm/test/Transforms/Inline/attributes.ll
+++ b/llvm/test/Transforms/Inline/attributes.ll
@@ -601,46 +601,6 @@ define i32 @test_no-signed-zeros-fp-math3(i32 %i) "no-signed-zeros-fp-math"="tru
 ; CHECK-NEXT: ret i32
 }
 
-define i32 @unsafe-fp-math_callee0(i32 %i) "unsafe-fp-math"="false" {
-  ret i32 %i
-; CHECK: @unsafe-fp-math_callee0(i32 %i) [[UNSAFE_FPMATH_FALSE:#[0-9]+]] {
-; CHECK-NEXT: ret i32
-}
-
-define i32 @unsafe-fp-math_callee1(i32 %i) "unsafe-fp-math"="true" {
-  ret i32 %i
-; CHECK: @unsafe-fp-math_callee1(i32 %i) [[UNSAFE_FPMATH_TRUE:#[0-9]+]] {
-; CHECK-NEXT: ret i32
-}
-
-define i32 @test_unsafe-fp-math0(i32 %i) "unsafe-fp-math"="false" {
-  %1 = call i32 @unsafe-fp-math_callee0(i32 %i)
-  ret i32 %1
-; CHECK: @test_unsafe-fp-math0(i32 %i) [[UNSAFE_FPMATH_FALSE]] {
-; CHECK-NEXT: ret i32
-}
-
-define i32 @test_unsafe-fp-math1(i32 %i) "unsafe-fp-math"="false" {
-  %1 = call i32 @unsafe-fp-math_callee1(i32 %i)
-  ret i32 %1
-; CHECK: @test_unsafe-fp-math1(i32 %i) [[UNSAFE_FPMATH_FALSE]] {
-; CHECK-NEXT: ret i32
-}
-
-define i32 @test_unsafe-fp-math2(i32 %i) "unsafe-fp-math"="true" {
-  %1 = call i32 @unsafe-fp-math_callee0(i32 %i)
-  ret i32 %1
-; CHECK: @test_unsafe-fp-math2(i32 %i) [[UNSAFE_FPMATH_FALSE]] {
-; CHECK-NEXT: ret i32
-}
-
-define i32 @test_unsafe-fp-math3(i32 %i) "unsafe-fp-math"="true" {
-  %1 = call i32 @unsafe-fp-math_callee1(i32 %i)
-  ret i32 %1
-; CHECK: @test_unsafe-fp-math3(i32 %i) [[UNSAFE_FPMATH_TRUE]] {
-; CHECK-NEXT: ret i32
-}
-
 ; Test that fn_ret_thunk_extern has no CompatRule; inlining is permitted.
 ; Test that fn_ret_thunk_extern has no MergeRule; fn_ret_thunk_extern is not
 ; propagated or dropped on the caller after inlining.
@@ -693,6 +653,4 @@ define i32 @loader_replaceable_caller() {
 ; CHECK: attributes [[NO_NANS_FPMATH_TRUE]] = { "no-nans-fp-math"="true" }
 ; CHECK: attributes [[NO_SIGNED_ZEROS_FPMATH_FALSE]] = { "no-signed-zeros-fp-math"="false" }
 ; CHECK: attributes [[NO_SIGNED_ZEROS_FPMATH_TRUE]] = { "no-signed-zeros-fp-math"="true" }
-; CHECK: attributes [[UNSAFE_FPMATH_FALSE]] = { "unsafe-fp-math"="false" }
-; CHECK: attributes [[UNSAFE_FPMATH_TRUE]] = { "unsafe-fp-math"="true" }
 ; CHECK: attributes [[FNRETTHUNK_EXTERN]] = { fn_ret_thunk_extern }
diff --git a/llvm/test/Transforms/InstCombine/select-and-or.ll b/llvm/test/Transforms/InstCombine/select-and-or.ll
index 453ca66..0b8eda4 100644
--- a/llvm/test/Transforms/InstCombine/select-and-or.ll
+++ b/llvm/test/Transforms/InstCombine/select-and-or.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -passes=instcombine < %s | FileCheck %s
 
 declare void @use(i1)
@@ -6,6 +6,10 @@ declare i1 @gen_i1()
 declare <2 x i1> @gen_v2i1()
 
 ; Should not be converted to "and", which has different poison semantics.
+;.
+; CHECK: @g1 = external global i16
+; CHECK: @g2 = external global i16
+;.
 define i1 @logical_and(i1 %a, i1 %b) {
 ; CHECK-LABEL: @logical_and(
 ; CHECK-NEXT:    [[RES:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false
@@ -225,29 +229,29 @@ define i1 @not_not_true(i1 %x, i1 %y) {
 
 ; (!x && !y) --> !(x || y)
 
-define i1 @not_not_false(i1 %x, i1 %y) {
+define i1 @not_not_false(i1 %x, i1 %y) !prof !0 {
 ; CHECK-LABEL: @not_not_false(
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[X:%.*]], i1 true, i1 [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[X:%.*]], i1 true, i1 [[Y:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    [[R:%.*]] = xor i1 [[TMP1]], true
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %notx = xor i1 %x, true
   %noty = xor i1 %y, true
-  %r = select i1 %notx, i1 %noty, i1 false
+  %r = select i1 %notx, i1 %noty, i1 false, !prof !1
   ret i1 %r
 }
 
 ; (!x || !y) --> !(x && y)
 
-define i1 @not_true_not(i1 %x, i1 %y) {
+define i1 @not_true_not(i1 %x, i1 %y) !prof !0 {
 ; CHECK-LABEL: @not_true_not(
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[X:%.*]], i1 [[Y:%.*]], i1 false
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[X:%.*]], i1 [[Y:%.*]], i1 false, !prof [[PROF1]]
 ; CHECK-NEXT:    [[R:%.*]] = xor i1 [[TMP1]], true
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %notx = xor i1 %x, true
   %noty = xor i1 %y, true
-  %r = select i1 %notx, i1 true, i1 %noty
+  %r = select i1 %notx, i1 true, i1 %noty, !prof !1
   ret i1 %r
 }
 
@@ -1348,3 +1352,12 @@ define i8 @test_logical_commuted_and_ne_a_b(i1 %other_cond, i8 %a, i8 %b)  {
   %select = select i1 %or.cond, i8 %a, i8 %b
   ret i8 %select
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 2}
+;.
diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
index d88eaf8..3d97048 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
@@ -58,15 +58,15 @@ define i1 @cond_eq_or_const(i8 %X, i8 %Y) !prof !0 {
   ret i1 %res
 }
 
-define i1 @xor_and(i1 %c, i32 %X, i32 %Y) {
+define i1 @xor_and(i1 %c, i32 %X, i32 %Y) !prof !0 {
 ; CHECK-LABEL: @xor_and(
 ; CHECK-NEXT:    [[COMP:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[NOT_C]], i1 true, i1 [[COMP]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[NOT_C]], i1 true, i1 [[COMP]], !prof [[PROF2:![0-9]+]]
 ; CHECK-NEXT:    ret i1 [[SEL]]
 ;
   %comp = icmp ult i32 %X, %Y
-  %sel = select i1 %c, i1 %comp, i1 false
+  %sel = select i1 %c, i1 %comp, i1 false, !prof !1
   %res = xor i1 %sel, true
   ret i1 %res
 }
@@ -97,15 +97,15 @@ define <2 x i1> @xor_and3(<2 x i1> %c, <2 x i32> %X, <2 x i32> %Y) {
   ret <2 x i1> %res
 }
 
-define i1 @xor_or(i1 %c, i32 %X, i32 %Y) {
+define i1 @xor_or(i1 %c, i32 %X, i32 %Y) !prof !0 {
 ; CHECK-LABEL: @xor_or(
 ; CHECK-NEXT:    [[COMP:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[NOT_C]], i1 [[COMP]], i1 false
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[NOT_C]], i1 [[COMP]], i1 false, !prof [[PROF2]]
 ; CHECK-NEXT:    ret i1 [[SEL]]
 ;
   %comp = icmp ult i32 %X, %Y
-  %sel = select i1 %c, i1 true, i1 %comp
+  %sel = select i1 %c, i1 true, i1 %comp, !prof !1
   %res = xor i1 %sel, true
   ret i1 %res
 }
@@ -802,4 +802,5 @@ define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) {
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
 ; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 2}
 ;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
index 829acbbf..305a692 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
@@ -210,3 +210,175 @@ loop:
 exit:
   ret void
 }
+
+define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst) {
+; IC1-LABEL: define void @test_masked_interleave_group(
+; IC1-SAME: i32 [[N:%.*]], ptr [[MASK:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) #[[ATTR0]] {
+; IC1-NEXT:  [[ENTRY:.*:]]
+; IC1-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
+; IC1-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
+; IC1-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; IC1-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
+; IC1-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP3]], i64 8)
+; IC1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[UMAX]]
+; IC1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; IC1:       [[VECTOR_MEMCHECK]]:
+; IC1-NEXT:    [[TMP4:%.*]] = zext i32 [[N]] to i64
+; IC1-NEXT:    [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4
+; IC1-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 16
+; IC1-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
+; IC1-NEXT:    [[TMP7:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; IC1-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[MASK]], i64 [[TMP7]]
+; IC1-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]]
+; IC1-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; IC1-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[MASK]], [[SCEVGEP]]
+; IC1-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; IC1-NEXT:    [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
+; IC1-NEXT:    [[BOUND14:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
+; IC1-NEXT:    [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; IC1-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; IC1-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; IC1:       [[VECTOR_PH]]:
+; IC1-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; IC1-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
+; IC1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP9]]
+; IC1-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
+; IC1-NEXT:    [[TMP10:%.*]] = trunc i64 [[N_VEC]] to i32
+; IC1-NEXT:    [[TMP11:%.*]] = mul i64 [[N_VEC]], 16
+; IC1-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
+; IC1-NEXT:    [[TMP13:%.*]] = mul i64 [[N_VEC]], 16
+; IC1-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]]
+; IC1-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[MASK]], i64 [[N_VEC]]
+; IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC1:       [[VECTOR_BODY]]:
+; IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
+; IC1-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
+; IC1-NEXT:    [[OFFSET_IDX6:%.*]] = mul i64 [[INDEX]], 16
+; IC1-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX6]]
+; IC1-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[MASK]], i64 [[INDEX]]
+; IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[NEXT_GEP8]], align 1, !alias.scope [[META6:![0-9]+]]
+; IC1-NEXT:    [[TMP16:%.*]] = icmp eq <vscale x 4 x i8> [[WIDE_LOAD]], zeroinitializer
+; IC1-NEXT:    [[INTERLEAVED_MASK:%.*]] = call <vscale x 16 x i1> @llvm.vector.interleave4.nxv16i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]])
+; IC1-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <vscale x 16 x float> @llvm.masked.load.nxv16f32.p0(ptr align 4 [[NEXT_GEP7]], <vscale x 16 x i1> [[INTERLEAVED_MASK]], <vscale x 16 x float> poison), !alias.scope [[META9:![0-9]+]]
+; IC1-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave4.nxv16f32(<vscale x 16 x float> [[WIDE_MASKED_VEC]])
+; IC1-NEXT:    [[TMP17:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0
+; IC1-NEXT:    [[TMP18:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1
+; IC1-NEXT:    [[TMP19:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 2
+; IC1-NEXT:    [[TMP20:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 3
+; IC1-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 16 x float> @llvm.vector.interleave4.nxv16f32(<vscale x 4 x float> [[TMP17]], <vscale x 4 x float> [[TMP18]], <vscale x 4 x float> [[TMP19]], <vscale x 4 x float> [[TMP20]])
+; IC1-NEXT:    [[INTERLEAVED_MASK9:%.*]] = call <vscale x 16 x i1> @llvm.vector.interleave4.nxv16i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]])
+; IC1-NEXT:    call void @llvm.masked.store.nxv16f32.p0(<vscale x 16 x float> [[INTERLEAVED_VEC]], ptr align 4 [[NEXT_GEP]], <vscale x 16 x i1> [[INTERLEAVED_MASK9]]), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]]
+; IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
+; IC1-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IC1-NEXT:    br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; IC1:       [[MIDDLE_BLOCK]]:
+; IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
+; IC1-NEXT:    br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
+; IC1:       [[SCALAR_PH]]:
+;
+; CHECK-LABEL: define void @test_masked_interleave_group(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[MASK:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
+; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP3]], i64 8)
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[UMAX]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK:       [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 16
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[MASK]], i64 [[TMP7]]
+; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[MASK]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
+; CHECK-NEXT:    [[BOUND14:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP9]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT:    [[TMP11:%.*]] = mul i64 [[N_VEC]], 16
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = mul i64 [[N_VEC]], 16
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[MASK]], i64 [[N_VEC]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[OFFSET_IDX6:%.*]] = mul i64 [[INDEX]], 16
+; CHECK-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX6]]
+; CHECK-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[MASK]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[NEXT_GEP8]], align 1, !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq <vscale x 4 x i8> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[INTERLEAVED_MASK:%.*]] = call <vscale x 16 x i1> @llvm.vector.interleave4.nxv16i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]])
+; CHECK-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <vscale x 16 x float> @llvm.masked.load.nxv16f32.p0(ptr align 4 [[NEXT_GEP7]], <vscale x 16 x i1> [[INTERLEAVED_MASK]], <vscale x 16 x float> poison), !alias.scope [[META9:![0-9]+]]
+; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave4.nxv16f32(<vscale x 16 x float> [[WIDE_MASKED_VEC]])
+; CHECK-NEXT:    [[TMP17:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0
+; CHECK-NEXT:    [[TMP18:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1
+; CHECK-NEXT:    [[TMP19:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 2
+; CHECK-NEXT:    [[TMP20:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 3
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 16 x float> @llvm.vector.interleave4.nxv16f32(<vscale x 4 x float> [[TMP17]], <vscale x 4 x float> [[TMP18]], <vscale x 4 x float> [[TMP19]], <vscale x 4 x float> [[TMP20]])
+; CHECK-NEXT:    [[INTERLEAVED_MASK9:%.*]] = call <vscale x 16 x i1> @llvm.vector.interleave4.nxv16i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP16]])
+; CHECK-NEXT:    call void @llvm.masked.store.nxv16f32.p0(<vscale x 16 x float> [[INTERLEAVED_VEC]], ptr align 4 [[NEXT_GEP]], <vscale x 16 x i1> [[INTERLEAVED_MASK9]]), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %dst.iv = phi ptr [ %dst, %entry ], [ %dst.iv.next, %loop.latch ]
+  %src.iv = phi ptr [ %src, %entry ], [ %src.iv.next, %loop.latch ]
+  %mask.iv = phi ptr [ %mask, %entry ], [ %mask.iv.next, %loop.latch ]
+  %mask.iv.next = getelementptr i8, ptr %mask.iv, i64 1
+  %mask.val = load i8, ptr %mask.iv, align 1
+  %should.copy = icmp eq i8 %mask.val, 0
+  br i1 %should.copy, label %then, label %loop.latch
+
+then:
+  %elem0 = load float, ptr %src.iv, align 4
+  store float %elem0, ptr %dst.iv, align 4
+  %src.1.ptr = getelementptr i8, ptr %src.iv, i64 4
+  %s1 = load float, ptr %src.1.ptr, align 4
+  %dst.1.ptr = getelementptr i8, ptr %dst.iv, i64 4
+  store float %s1, ptr %dst.1.ptr, align 4
+  %src.2.ptr = getelementptr i8, ptr %src.iv, i64 8
+  %s2 = load float, ptr %src.2.ptr, align 4
+  %dst.2.ptr = getelementptr i8, ptr %dst.iv, i64 8
+  store float %s2, ptr %dst.2.ptr, align 4
+  %src.3.ptr = getelementptr i8, ptr %src.iv, i64 12
+  %s3 = load float, ptr %src.3.ptr, align 4
+  %dst.3.ptr = getelementptr i8, ptr %dst.iv, i64 12
+  store float %s3, ptr %dst.3.ptr, align 4
+  br label %loop.latch
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %src.iv.next = getelementptr i8, ptr %src.iv, i64 16
+  %dst.iv.next = getelementptr i8, ptr %dst.iv, i64 16
+  %ec = icmp eq i32 %iv, %N
+  br i1 %ec, label %exit, label %loop.header
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll
index c8d20dc..e42e2c7 100644
--- a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll
@@ -7,6 +7,7 @@ target triple = "wasm32-unknown-wasi"
 %struct.TwoInts = type { i32, i32 }
 %struct.ThreeInts = type { i32, i32, i32 }
 %struct.FourInts = type { i32, i32, i32, i32 }
+%struct.TwoShorts = type { i16, i16 }
 %struct.ThreeShorts = type { i16, i16, i16 }
 %struct.FourShorts = type { i16, i16, i16, i16 }
 %struct.TwoBytes = type { i8, i8 }
@@ -14,6 +15,8 @@ target triple = "wasm32-unknown-wasi"
 %struct.FourBytes = type { i8, i8, i8, i8 }
 %struct.FiveBytes = type { i8, i8, i8, i8, i8 }
 %struct.EightBytes = type { i8, i8, i8, i8, i8, i8, i8, i8 }
+%struct.TwoFloats = type { float, float }
+%struct.FourFloats = type { float, float, float, float }
 
 ; CHECK-LABEL: two_ints_same_op
 ; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at %10
@@ -1350,3 +1353,1000 @@ define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0,
 34:                                               ; preds = %6, %4
   ret void
 }
+
+; CHECK-LABEL: two_floats_same_op
+; CHECK: LV: Scalar loop costs: 14
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: LV: Scalar loop costs: 14.
+; CHECK: LV: Vector loop of width 2 costs: 19.
+; CHECK: LV: Vector loop of width 4 costs: 15.
+; CHECK: LV: Selecting VF: 1.
+define hidden void @two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp21.not = icmp eq i32 %N, 0
+  br i1 %cmp21.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.022
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.022
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.022
+  store float %mul, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %mul8, ptr %y10, align 4
+  %inc = add nuw i32 %i.022, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_vary_op
+; CHECK: LV: Scalar loop costs: 14
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: LV: Scalar loop costs: 14.
+; CHECK: LV: Vector loop of width 2 costs: 19.
+; CHECK: LV: Vector loop of width 4 costs: 15.
+; CHECK: LV: Selecting VF: 1.
+define hidden void @two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp20.not = icmp eq i32 %N, 0
+  br i1 %cmp20.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.021 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.021
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.021
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.021
+  store float %add, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %sub, ptr %y9, align 4
+  %inc = add nuw i32 %i.021, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_bytes_two_floats_same_op
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: LV: Scalar loop costs: 18
+; CHECK: LV: Vector loop of width 2 costs: 23
+; CHECK: LV: Vector loop of width 4 costs: 13
+; CHECK: LV: Selecting VF: 4.
+define hidden void @two_bytes_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp24.not = icmp eq i32 %N, 0
+  br i1 %cmp24.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoBytes, ptr %a, i32 %i.025
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoBytes, ptr %b, i32 %i.025
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.025
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %inc = add nuw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_bytes_two_floats_vary_op
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: LV: Scalar loop costs: 18
+; CHECK: LV: Vector loop of width 2 costs: 23
+; CHECK: LV: Vector loop of width 4 costs: 13
+; CHECK: LV: Selecting VF: 4.
+define hidden void @two_bytes_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp23.not = icmp eq i32 %N, 0
+  br i1 %cmp23.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.024 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoBytes, ptr %a, i32 %i.024
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoBytes, ptr %b, i32 %i.024
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %add = fadd float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.024
+  store float %add, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %sub = fsub float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %sub, ptr %y12, align 4
+  %inc = add nuw i32 %i.024, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_bytes_same_op
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: LV: Scalar loop costs: 16
+; CHECK: LV: Vector loop of width 2 costs: 21
+; CHECK: LV: Vector loop of width 4 costs: 14.
+; CHECK: LV: Selecting VF: 4.
+define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp22.not = icmp eq i32 %N, 0
+  br i1 %cmp22.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.023
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.023
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoBytes, ptr %res, i32 %i.023
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %conv9 = fptosi float %mul8 to i8
+  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv9, ptr %y11, align 1
+  %inc = add nuw i32 %i.023, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_bytes_vary_op
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: LV: Scalar loop costs: 16
+; CHECK: LV: Vector loop of width 2 costs: 21
+; CHECK: LV: Vector loop of width 4 costs: 14.
+; CHECK: LV: Selecting VF: 4.
+define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp21.not = icmp eq i32 %N, 0
+  br i1 %cmp21.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.022
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.022
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %conv = fptosi float %add to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoBytes, ptr %res, i32 %i.022
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %conv8 = fptosi float %sub to i8
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv8, ptr %y10, align 1
+  %inc = add nuw i32 %i.022, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_shorts_two_floats_same_op
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: LV: Scalar loop costs: 18
+; CHECK: LV: Vector loop of width 2 costs: 22
+; CHECK: LV: Vector loop of width 4 costs: 11.
+; CHECK: LV: Selecting VF: 4.
+define hidden void @two_shorts_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp24.not = icmp eq i32 %N, 0
+  br i1 %cmp24.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoShorts, ptr %a, i32 %i.025
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoShorts, ptr %b, i32 %i.025
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.025
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %inc = add nuw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_shorts_two_floats_vary_op
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: LV: Scalar loop costs: 18
+; CHECK: LV: Vector loop of width 2 costs: 22
+; CHECK: LV: Vector loop of width 4 costs: 11.
+; CHECK: LV: Selecting VF: 4.
+define hidden void @two_shorts_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp23.not = icmp eq i32 %N, 0
+  br i1 %cmp23.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.024 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoShorts, ptr %a, i32 %i.024
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoShorts, ptr %b, i32 %i.024
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %add = fadd float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.024
+  store float %add, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %sub = fsub float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %sub, ptr %y12, align 4
+  %inc = add nuw i32 %i.024, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_shorts_same_op
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: LV: Scalar loop costs: 16
+; CHECK: LV: Vector loop of width 2 costs: 20
+; CHECK: LV: Vector loop of width 4 costs: 13.
+; CHECK: LV: Selecting VF: 4.
+define hidden void @two_floats_two_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp22.not = icmp eq i32 %N, 0
+  br i1 %cmp22.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.023
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.023
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i16
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoShorts, ptr %res, i32 %i.023
+  store i16 %conv, ptr %arrayidx3, align 2
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %conv9 = fptosi float %mul8 to i16
+  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i16 %conv9, ptr %y11, align 2
+  %inc = add nuw i32 %i.023, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_shorts_vary_op
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
+; CHECK: LV: Scalar loop costs: 16
+; CHECK: LV: Vector loop of width 2 costs: 20
+; CHECK: LV: Vector loop of width 4 costs: 13.
+; CHECK: LV: Selecting VF: 4.
+define hidden void @two_floats_two_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp21.not = icmp eq i32 %N, 0
+  br i1 %cmp21.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.022
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.022
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %conv = fptosi float %add to i16
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoShorts, ptr %res, i32 %i.022
+  store i16 %conv, ptr %arrayidx3, align 2
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %conv8 = fptosi float %sub to i16
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i16 %conv8, ptr %y10, align 2
+  %inc = add nuw i32 %i.022, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_same_op
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: LV: Scalar loop costs: 24
+; CHECK: LV: Vector loop of width 2 costs: 33
+; CHECK: LV: Vector loop of width 4 costs: 30
+; CHECK: LV: Selecting VF: 4
+define hidden void @four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp45.not = icmp eq i32 %N, 0
+  br i1 %cmp45.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.046 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.046
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.046
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.046
+  store float %mul, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %mul8, ptr %y10, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z13 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z13, align 4
+  %mul14 = fmul float %4, %5
+  %z16 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 8
+  store float %mul14, ptr %z16, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w19 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w19, align 4
+  %mul20 = fmul float %6, %7
+  %w22 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 12
+  store float %mul20, ptr %w22, align 4
+  %inc = add nuw i32 %i.046, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_vary_op
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: LV: Scalar loop costs: 24
+; CHECK: LV: Vector loop of width 2 costs: 33
+; CHECK: LV: Vector loop of width 4 costs: 30
+; CHECK: LV: Selecting VF: 1
+define hidden void @four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp42.not = icmp eq i32 %N, 0
+  br i1 %cmp42.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.043 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.043
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.043
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.043
+  store float %add, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %sub, ptr %y9, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z12 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z12, align 4
+  %mul = fmul float %4, %5
+  %z14 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 8
+  store float %mul, ptr %z14, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w17 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w17, align 4
+  %div = fdiv float %6, %7
+  %w19 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 12
+  store float %div, ptr %w19, align 4
+  %inc = add nuw i32 %i.043, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_bytes_four_floats_same_op
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: LV: Scalar loop costs: 32
+; CHECK: LV: Vector loop of width 2 costs: 43
+; CHECK: LV: Vector loop of width 4 costs: 23
+; CHECK: LV: Selecting VF: 4
+define hidden void @four_bytes_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp52.not = icmp eq i32 %N, 0
+  br i1 %cmp52.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.053 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourBytes, ptr %a, i32 %i.053
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourBytes, ptr %b, i32 %i.053
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.053
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %4 = load i8, ptr %z, align 1
+  %conv15 = sitofp i8 %4 to float
+  %z17 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %5 = load i8, ptr %z17, align 1
+  %conv18 = sitofp i8 %5 to float
+  %mul19 = fmul float %conv15, %conv18
+  %z21 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 8
+  store float %mul19, ptr %z21, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 3
+  %6 = load i8, ptr %w, align 1
+  %conv23 = sitofp i8 %6 to float
+  %w25 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 3
+  %7 = load i8, ptr %w25, align 1
+  %conv26 = sitofp i8 %7 to float
+  %mul27 = fmul float %conv23, %conv26
+  %w29 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 12
+  store float %mul27, ptr %w29, align 4
+  %inc = add nuw i32 %i.053, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_bytes_four_floats_vary_op
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: LV: Scalar loop costs: 32
+; CHECK: LV: Vector loop of width 2 costs: 43
+; CHECK: LV: Vector loop of width 4 costs: 23
+; CHECK: LV: Selecting VF: 4
+define hidden void @four_bytes_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp49.not = icmp eq i32 %N, 0
+  br i1 %cmp49.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.050 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourBytes, ptr %a, i32 %i.050
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourBytes, ptr %b, i32 %i.050
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.050
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %add = fadd float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %add, ptr %y12, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %4 = load i8, ptr %z, align 1
+  %conv14 = sitofp i8 %4 to float
+  %z16 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %5 = load i8, ptr %z16, align 1
+  %conv17 = sitofp i8 %5 to float
+  %div = fdiv float %conv14, %conv17
+  %z19 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 8
+  store float %div, ptr %z19, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 3
+  %6 = load i8, ptr %w, align 1
+  %conv21 = sitofp i8 %6 to float
+  %w23 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 3
+  %7 = load i8, ptr %w23, align 1
+  %conv24 = sitofp i8 %7 to float
+  %sub = fsub float %conv21, %conv24
+  %w26 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 12
+  store float %sub, ptr %w26, align 4
+  %inc = add nuw i32 %i.050, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_four_bytes_same_op
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: LV: Scalar loop costs: 28
+; CHECK: LV: Vector loop of width 2 costs: 38
+; CHECK: LV: Vector loop of width 4 costs: 26
+; CHECK: LV: Selecting VF: 4
+define hidden void @four_floats_four_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp48.not = icmp eq i32 %N, 0
+  br i1 %cmp48.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.049 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.049
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.049
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourBytes, ptr %res, i32 %i.049
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %conv9 = fptosi float %mul8 to i8
+  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv9, ptr %y11, align 1
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z14 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z14, align 4
+  %mul15 = fmul float %4, %5
+  %conv16 = fptosi float %mul15 to i8
+  %z18 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i8 %conv16, ptr %z18, align 1
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w21 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w21, align 4
+  %mul22 = fmul float %6, %7
+  %conv23 = fptosi float %mul22 to i8
+  %w25 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 3
+  store i8 %conv23, ptr %w25, align 1
+  %inc = add nuw i32 %i.049, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_four_bytes_vary_op
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: LV: Scalar loop costs: 28
+; CHECK: LV: Vector loop of width 2 costs: 38
+; CHECK: LV: Vector loop of width 4 costs: 26
+; CHECK: LV: Selecting VF: 4
+define hidden void @four_floats_four_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp45.not = icmp eq i32 %N, 0
+  br i1 %cmp45.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.046 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.046
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.046
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourBytes, ptr %res, i32 %i.046
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %add = fadd float %2, %3
+  %conv8 = fptosi float %add to i8
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv8, ptr %y10, align 1
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z13 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z13, align 4
+  %div = fdiv float %4, %5
+  %conv14 = fptosi float %div to i8
+  %z16 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i8 %conv14, ptr %z16, align 1
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w19 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w19, align 4
+  %sub = fsub float %6, %7
+  %conv20 = fptosi float %sub to i8
+  %w22 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 3
+  store i8 %conv20, ptr %w22, align 1
+  %inc = add nuw i32 %i.046, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_shorts_four_floats_same_op
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: LV: Scalar loop costs: 32
+; CHECK: LV: Vector loop of width 2 costs: 37
+; CHECK: LV: Vector loop of width 4 costs: 23
+; CHECK: LV: Selecting VF: 4
+define hidden void @four_shorts_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp52.not = icmp eq i32 %N, 0
+  br i1 %cmp52.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.053 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourShorts, ptr %a, i32 %i.053
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourShorts, ptr %b, i32 %i.053
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.053
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %4 = load i16, ptr %z, align 2
+  %conv15 = sitofp i16 %4 to float
+  %z17 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %5 = load i16, ptr %z17, align 2
+  %conv18 = sitofp i16 %5 to float
+  %mul19 = fmul float %conv15, %conv18
+  %z21 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 8
+  store float %mul19, ptr %z21, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 6
+  %6 = load i16, ptr %w, align 2
+  %conv23 = sitofp i16 %6 to float
+  %w25 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 6
+  %7 = load i16, ptr %w25, align 2
+  %conv26 = sitofp i16 %7 to float
+  %mul27 = fmul float %conv23, %conv26
+  %w29 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 12
+  store float %mul27, ptr %w29, align 4
+  %inc = add nuw i32 %i.053, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_shorts_four_floats_vary_op
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: LV: Scalar loop costs: 32
+; CHECK: LV: Vector loop of width 2 costs: 37
+; CHECK: LV: Vector loop of width 4 costs: 23
+; CHECK: LV: Selecting VF: 4
+define hidden void @four_shorts_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp49.not = icmp eq i32 %N, 0
+  br i1 %cmp49.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.050 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourShorts, ptr %a, i32 %i.050
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourShorts, ptr %b, i32 %i.050
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.FourFloats, ptr %res, i32 %i.050
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %add = fadd float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %add, ptr %y12, align 4
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %4 = load i16, ptr %z, align 2
+  %conv14 = sitofp i16 %4 to float
+  %z16 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %5 = load i16, ptr %z16, align 2
+  %conv17 = sitofp i16 %5 to float
+  %div = fdiv float %conv14, %conv17
+  %z19 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 8
+  store float %div, ptr %z19, align 4
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 6
+  %6 = load i16, ptr %w, align 2
+  %conv21 = sitofp i16 %6 to float
+  %w23 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 6
+  %7 = load i16, ptr %w23, align 2
+  %conv24 = sitofp i16 %7 to float
+  %sub = fsub float %conv21, %conv24
+  %w26 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 12
+  store float %sub, ptr %w26, align 4
+  %inc = add nuw i32 %i.050, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_four_shorts_same_op
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: LV: Scalar loop costs: 28
+; CHECK: LV: Vector loop of width 2 costs: 35
+; CHECK: LV: Vector loop of width 4 costs: 26
+; CHECK: LV: Selecting VF: 4
+define hidden void @four_floats_four_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp48.not = icmp eq i32 %N, 0
+  br i1 %cmp48.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.049 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.049
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.049
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i16
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourShorts, ptr %res, i32 %i.049
+  store i16 %conv, ptr %arrayidx3, align 2
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %conv9 = fptosi float %mul8 to i16
+  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i16 %conv9, ptr %y11, align 2
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z14 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z14, align 4
+  %mul15 = fmul float %4, %5
+  %conv16 = fptosi float %mul15 to i16
+  %z18 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store i16 %conv16, ptr %z18, align 2
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w21 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w21, align 4
+  %mul22 = fmul float %6, %7
+  %conv23 = fptosi float %mul22 to i16
+  %w25 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 6
+  store i16 %conv23, ptr %w25, align 2
+  %inc = add nuw i32 %i.049, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: four_floats_four_shorts_vary_op
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
+; CHECK: LV: Scalar loop costs: 28
+; CHECK: LV: Vector loop of width 2 costs: 35
+; CHECK: LV: Vector loop of width 4 costs: 26
+; CHECK: LV: Selecting VF: 4
+define hidden void @four_floats_four_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp45.not = icmp eq i32 %N, 0
+  br i1 %cmp45.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.046 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.FourFloats, ptr %a, i32 %i.046
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.FourFloats, ptr %b, i32 %i.046
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i16
+  %arrayidx3 = getelementptr inbounds nuw %struct.FourShorts, ptr %res, i32 %i.046
+  store i16 %conv, ptr %arrayidx3, align 2
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %add = fadd float %2, %3
+  %conv8 = fptosi float %add to i16
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 2
+  store i16 %conv8, ptr %y10, align 2
+  %z = getelementptr inbounds nuw i8, ptr %arrayidx, i32 8
+  %4 = load float, ptr %z, align 4
+  %z13 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 8
+  %5 = load float, ptr %z13, align 4
+  %div = fdiv float %4, %5
+  %conv14 = fptosi float %div to i16
+  %z16 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store i16 %conv14, ptr %z16, align 2
+  %w = getelementptr inbounds nuw i8, ptr %arrayidx, i32 12
+  %6 = load float, ptr %w, align 4
+  %w19 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 12
+  %7 = load float, ptr %w19, align 4
+  %sub = fsub float %6, %7
+  %conv20 = fptosi float %sub to i16
+  %w22 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 6
+  store i16 %conv20, ptr %w22, align 2
+  %inc = add nuw i32 %i.046, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll b/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll
index 0f18dc2..46e38d9 100644
--- a/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll
+++ b/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -hoist-common-insts=true -enable-unsafe-fp-math -S >%t
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -hoist-common-insts=true -S >%t
 ; RUN: FileCheck %s < %t
 ; ModuleID = 't.cc'
 
diff --git a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll
index c7bc43e1..b61d659 100644
--- a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll
+++ b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=powerpc64le-unknown-linux-gnu -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -hoist-common-insts=true -enable-unsafe-fp-math -S | \
+; RUN: opt < %s -mtriple=powerpc64le-unknown-linux-gnu -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -hoist-common-insts=true -S | \
 ; RUN: FileCheck %s
 
 ; This case is copied from test/Transforms/SimplifyCFG/AArch64/
diff --git a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
new file mode 100644
index 0000000..10566ae
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
@@ -0,0 +1,132 @@
+; -stats requires asserts
+; REQUIRES: asserts
+
+; Check that we can still devirtualize outside LTO mode when speculative devirtualization is enabled.
+; Check that we skip devirtualization for empty functions in speculative devirtualization mode
+
+; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively \
+; RUN: -pass-remarks=wholeprogramdevirt -stats %s 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:13:0: devirtualized vf
+; CHECK-NOT: devirtualized
+
+@vt1 = constant [1 x ptr] [ptr @vf], !type !8
+@vt2 = constant [1 x ptr] [ptr @vf_empty], !type !12
+
+define i1 @vf(ptr %this) #0 !dbg !7 {
+  ret i1 true
+}
+
+; This should NOT be devirtualized because during non-lto empty functions
+; are skipped.
+define void @vf_empty(ptr %this) !dbg !11 {
+  ret void
+}
+
+; CHECK: define void @call
+define void @call(ptr %obj) #1 !dbg !5 {
+  %vtable = load ptr, ptr %obj
+  %p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid")
+  call void @llvm.assume(i1 %p)
+  %fptr = load ptr, ptr %vtable
+  ; CHECK: if.true.direct_targ:
+  ; CHECK:   call i1 @vf(
+  ; CHECK: if.false.orig_indirect:
+  ; CHECK:   call i1 %fptr(
+  call i1 %fptr(ptr %obj), !dbg !6
+  ret void
+}
+
+
+; CHECK: define void @call1
+define void @call1(ptr %obj) #1 !dbg !9 {
+  %vtable = load ptr, ptr %obj
+  %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1")
+  call void @llvm.assume(i1 %p)
+  %fptr = load ptr, ptr %vtable, align 8
+  ; CHECK: call i1 %fptr
+  %1 = call i1 %fptr(ptr %obj), !dbg !10
+  ret void
+}
+declare ptr @llvm.load.relative.i32(ptr, i32)
+
+@vt3 = private unnamed_addr constant [1 x i32] [
+  i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32)
+], align 4, !type !15
+
+; CHECK: define void @call2
+define void @call2(ptr %obj) #1 !dbg !13 {
+  %vtable = load ptr, ptr %obj
+  %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2")
+  call void @llvm.assume(i1 %p)
+  %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
+  ; CHECK: if.true.direct_targ:
+  ; CHECK:   call i1 @vf(
+  ; CHECK: if.false.orig_indirect:
+  ; CHECK:   call i1 %fptr(
+  call i1 %fptr(ptr %obj), !dbg !14
+  ret void
+}
+
+@_ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [
+  i32 0,  ; offset to top
+  i32 0,  ; rtti
+  i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32)  ; vf_emptyunc offset
+] }, align 4, !type !18
+
+; CHECK: define void @call3
+define void @call3(ptr %obj) #1 !dbg !16 {
+  %vtable = load ptr, ptr %obj
+  %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3")
+  call void @llvm.assume(i1 %p)
+  %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8)
+  ; CHECK: if.true.direct_targ:
+  ; CHECK:   call i1 @vf(
+  ; CHECK: if.false.orig_indirect:
+  ; CHECK:   call i1 %fptr(
+  call i1 %fptr(ptr %obj), !dbg !17
+  ret void
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare i1 @llvm.public.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "devirt-single.cc", directory: ".")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{!"clang version 4.0.0 (trunk 278098)"}
+!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!6 = !DILocation(line: 30, column: 32, scope: !5)
+!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!8 = !{i32 0, !"typeid"}
+
+!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!10 = !DILocation(line: 35, column: 32, scope: !9)
+!11 = distinct !DISubprogram(name: "vf_empty", linkageName: "_ZN3vt18vf_emptyEv", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!12 = !{i32 0, !"typeid1"}
+
+!13 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!14 = !DILocation(line: 41, column: 32, scope: !13)
+!15 = !{i32 0, !"typeid2"}
+
+!16 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!17 = !DILocation(line: 51, column: 32, scope: !16)
+!18 = !{i32 0, !"typeid3"}
+
+
+
+; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets
+; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations
diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
index d8f5c91..8327e1c 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
@@ -11,6 +11,9 @@
 ; Check wildcard
 ; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt -wholeprogramdevirt-skip=vf?i1 %s 2>&1 | FileCheck %s --check-prefix=SKIP
 
+; Check that no stats are reported in speculative devirtualization mode as the virtual const prop is disabled.
+; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively -stats %s 2>&1 | FileCheck %s --check-prefix=CHECK-SPECULATIVE-WPD
+
 target datalayout = "e-p:64:64"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -225,3 +228,7 @@ declare ptr @llvm.load.relative.i32(ptr, i32)
 ; CHECK: 2 wholeprogramdevirt - Number of unique return value optimizations
 ; CHECK: 2 wholeprogramdevirt - Number of virtual constant propagations
 ; CHECK: 2 wholeprogramdevirt - Number of 1 bit virtual constant propagations
+
+; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of unique return value optimizations
+; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of virtual constant propagations
+; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of 1 bit virtual constant propagations
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
index 1ffe533..d1df304 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
@@ -1403,8 +1403,8 @@ vzeroupper
 # CHECK-NEXT:  1      8     0.50    *                   vpblendvb	%xmm3, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpblendw	$11, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpblendw	$11, (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  4      4     2.00                        vpclmulqdq	$11, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  4      11    2.00    *                   vpclmulqdq	$11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  4      4     1.50                        vpclmulqdq	$11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  4      11    1.50    *                   vpclmulqdq	$11, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpcmpeqb	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpeqb	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpcmpeqd	%xmm0, %xmm1, %xmm2
@@ -1415,8 +1415,8 @@ vzeroupper
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpeqw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  8      6     3.00                        vpcmpestri	$1, %xmm0, %xmm2
 # CHECK-NEXT:  12     13    3.00    *                   vpcmpestri	$1, (%rax), %xmm2
-# CHECK-NEXT:  7      6     3.00                        vpcmpestrm	$1, %xmm0, %xmm2
-# CHECK-NEXT:  12     13    3.00    *                   vpcmpestrm	$1, (%rax), %xmm2
+# CHECK-NEXT:  7      7     3.00                        vpcmpestrm	$1, %xmm0, %xmm2
+# CHECK-NEXT:  12     14    3.00    *                   vpcmpestrm	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpcmpgtb	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpgtb	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpcmpgtd	%xmm0, %xmm1, %xmm2
@@ -1427,8 +1427,8 @@ vzeroupper
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpgtw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  4      2     2.00                        vpcmpistri	$1, %xmm0, %xmm2
 # CHECK-NEXT:  4      9     2.00    *                   vpcmpistri	$1, (%rax), %xmm2
-# CHECK-NEXT:  3      6     2.00                        vpcmpistrm	$1, %xmm0, %xmm2
-# CHECK-NEXT:  4      13    2.00    *                   vpcmpistrm	$1, (%rax), %xmm2
+# CHECK-NEXT:  3      7     2.00                        vpcmpistrm	$1, %xmm0, %xmm2
+# CHECK-NEXT:  4      14    2.00    *                   vpcmpistrm	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vperm2f128	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      10    1.00    *                   vperm2f128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vpermilpd	$1, %xmm0, %xmm2
@@ -1749,7 +1749,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 1.33   1.33   1.33   16.50  16.50  16.50  16.50   -     205.25 393.58 268.08 158.08 208.50 208.50 65.00  119.67 119.67 119.67 107.00 107.00 107.00 19.00  19.00
+# CHECK-NEXT: 1.33   1.33   1.33   16.50  16.50  16.50  16.50   -     204.25 392.58 268.08 158.08 208.50 208.50 65.00  119.67 119.67 119.67 107.00 107.00 107.00 19.00  19.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2126,8 +2126,8 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpblendvb	%xmm3, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpblendw	$11, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpblendw	$11, (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vpclmulqdq	$11, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpclmulqdq	$11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -      -      -      -      -      -      -      -      -      -      -      -     vpclmulqdq	$11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpclmulqdq	$11, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpcmpeqb	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpcmpeqb	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpcmpeqd	%xmm0, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
index 6dc5bac..6c8fac4 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
@@ -560,14 +560,14 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpgtw	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     1.00                        vperm2i128	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      8     1.00    *                   vperm2i128	$1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  2      5     1.00                        vpermd	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      12    2.00    *                   vpermd	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  2      6     1.00                        vpermpd	$1, %ymm0, %ymm2
-# CHECK-NEXT:  3      13    2.00    *                   vpermpd	$1, (%rax), %ymm2
-# CHECK-NEXT:  2      7     1.00                        vpermps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  3      14    2.00    *                   vpermps	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  2      6     1.00                        vpermq	$1, %ymm0, %ymm2
-# CHECK-NEXT:  2      12    2.00    *                   vpermq	$1, (%rax), %ymm2
+# CHECK-NEXT:  1      4     1.00                        vpermd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      11    1.00    *                   vpermd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      4     1.00                        vpermpd	$1, %ymm0, %ymm2
+# CHECK-NEXT:  1      11    1.00    *                   vpermpd	$1, (%rax), %ymm2
+# CHECK-NEXT:  1      4     1.00                        vpermps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      11    1.00    *                   vpermps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      4     1.00                        vpermq	$1, %ymm0, %ymm2
+# CHECK-NEXT:  1      11    1.00    *                   vpermq	$1, (%rax), %ymm2
 # CHECK-NEXT:  1      5     0.33    *                   vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT:  1      5     0.33    *                   vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  1      5     0.33    *                   vpgatherdq	%xmm0, (%rax,%xmm1,2), %xmm2
@@ -789,7 +789,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 6.67   6.67   6.67    -      -      -      -      -     93.75  132.75 92.25  36.25  80.50  80.50  29.00  52.33  52.33  52.33  50.67  50.67  50.67  2.50   2.50
+# CHECK-NEXT: 6.67   6.67   6.67    -      -      -      -      -     93.75  128.75 92.25  36.25  80.50  80.50  29.00  52.33  52.33  52.33  50.67  50.67  50.67  2.50   2.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -894,13 +894,13 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vperm2i128	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vperm2i128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vpermd	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -     2.00    -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -     1.00    -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vpermpd	$1, %ymm0, %ymm2
-# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -     2.00    -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermpd	$1, (%rax), %ymm2
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -     1.00    -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermpd	$1, (%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vpermps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -     2.00    -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -     1.00    -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vpermq	$1, %ymm0, %ymm2
-# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -     2.00    -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermq	$1, (%rax), %ymm2
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -     1.00    -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermq	$1, (%rax), %ymm2
 # CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpgatherdq	%xmm0, (%rax,%xmm1,2), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
index 72d7de3..14b8e5f 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
@@ -1207,7 +1207,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        vaddps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      10    1.00    *                   vaddps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      10    1.00    *                   vaddps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      4     0.50                        valignd	$1, %zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  1      2     0.50                        valignd	$1, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  1      8     1.00    *                   valignd	$1, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  1      8     1.00    *                   valignd	$1, (%rax){1to16}, %zmm17, %zmm19
 # CHECK-NEXT:  1      1     1.00                        valignd	$1, %zmm16, %zmm17, %zmm19 {%k1}
@@ -1216,7 +1216,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        valignd	$1, %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     1.00    *                   valignd	$1, (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     1.00    *                   valignd	$1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      4     0.50                        valignq	$1, %zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  1      2     0.50                        valignq	$1, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  1      8     1.00    *                   valignq	$1, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  1      8     1.00    *                   valignq	$1, (%rax){1to8}, %zmm17, %zmm19
 # CHECK-NEXT:  1      1     1.00                        valignq	$1, %zmm16, %zmm17, %zmm19 {%k1}
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
index 552b3e4..ead609e 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
@@ -1948,7 +1948,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      3     0.50                        vaddps	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      10    0.50    *                   vaddps	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      10    0.50    *                   vaddps	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT:  1      4     0.50                        valignd	$1, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT:  1      3     0.50                        valignd	$1, %xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   valignd	$1, (%rax), %xmm17, %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   valignd	$1, (%rax){1to4}, %xmm17, %xmm19
 # CHECK-NEXT:  1      1     0.50                        valignd	$1, %xmm16, %xmm17, %xmm19 {%k1}
@@ -1957,7 +1957,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        valignd	$1, %xmm16, %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   valignd	$1, (%rax), %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   valignd	$1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      4     0.50                        valignd	$1, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT:  1      4     1.00                        valignd	$1, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  1      8     0.50    *                   valignd	$1, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  1      8     0.50    *                   valignd	$1, (%rax){1to8}, %ymm17, %ymm19
 # CHECK-NEXT:  1      1     0.50                        valignd	$1, %ymm16, %ymm17, %ymm19 {%k1}
@@ -1966,7 +1966,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        valignd	$1, %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   valignd	$1, (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   valignd	$1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT:  1      4     0.50                        valignq	$1, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT:  1      3     0.50                        valignq	$1, %xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   valignq	$1, (%rax), %xmm17, %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   valignq	$1, (%rax){1to2}, %xmm17, %xmm19
 # CHECK-NEXT:  1      1     0.50                        valignq	$1, %xmm16, %xmm17, %xmm19 {%k1}
@@ -1975,7 +1975,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        valignq	$1, %xmm16, %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   valignq	$1, (%rax), %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   valignq	$1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      4     0.50                        valignq	$1, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT:  1      4     1.00                        valignq	$1, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  1      8     0.50    *                   valignq	$1, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  1      8     0.50    *                   valignq	$1, (%rax){1to4}, %ymm17, %ymm19
 # CHECK-NEXT:  1      1     0.50                        valignq	$1, %ymm16, %ymm17, %ymm19 {%k1}
@@ -3614,7 +3614,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 10.67  10.67  10.67   -      -      -      -      -     208.00 1083.00 636.50 261.50 509.50 509.50 32.00 355.67 355.67 355.67 334.33 334.33 334.33 32.00  32.00
+# CHECK-NEXT: 10.67  10.67  10.67   -      -      -      -      -     208.00 1084.00 637.50 261.50 509.50 509.50 32.00 355.67 355.67 355.67 334.33 334.33 334.33 32.00  32.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -3663,7 +3663,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     valignd	$1, %xmm16, %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     valignd	$1, (%rax), %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     valignd	$1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     valignd	$1, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     valignd	$1, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     valignd	$1, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     valignd	$1, (%rax){1to8}, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     valignd	$1, %ymm16, %ymm17, %ymm19 {%k1}
@@ -3681,7 +3681,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     valignq	$1, %xmm16, %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     valignq	$1, (%rax), %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     valignq	$1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     valignq	$1, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     valignq	$1, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     valignq	$1, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     valignq	$1, (%rax){1to4}, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     valignq	$1, %ymm16, %ymm17, %ymm19 {%k1}
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdq.s
index 87ba060..d1f2a98 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdq.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdq.s
@@ -13,8 +13,8 @@ vpclmulqdq    $11, (%rax), %zmm17, %zmm19
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  4      4     2.00                        vpclmulqdq	$11, %zmm16, %zmm17, %zmm19
-# CHECK-NEXT:  4      11    2.00    *                   vpclmulqdq	$11, (%rax), %zmm17, %zmm19
+# CHECK-NEXT:  4      4     1.50                        vpclmulqdq	$11, %zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  4      11    1.50    *                   vpclmulqdq	$11, (%rax), %zmm17, %zmm19
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn4AGU0
@@ -43,9 +43,9 @@ vpclmulqdq    $11, (%rax), %zmm17, %zmm19
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     4.00   4.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     3.00   3.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vpclmulqdq	$11, %zmm16, %zmm17, %zmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpclmulqdq	$11, (%rax), %zmm17, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -      -      -      -      -      -      -      -      -      -      -      -     vpclmulqdq	$11, %zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpclmulqdq	$11, (%rax), %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdqvl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdqvl.s
index 3c80c56..ea7a280 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdqvl.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdqvl.s
@@ -16,10 +16,10 @@ vpclmulqdq    $11, (%rax), %ymm17, %ymm19
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  4      4     2.00                        vpclmulqdq	$11, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  4      11    2.00    *                   vpclmulqdq	$11, (%rax), %xmm17, %xmm19
-# CHECK-NEXT:  4      4     2.00                        vpclmulqdq	$11, %ymm16, %ymm17, %ymm19
-# CHECK-NEXT:  4      11    2.00    *                   vpclmulqdq	$11, (%rax), %ymm17, %ymm19
+# CHECK-NEXT:  4      4     1.50                        vpclmulqdq	$11, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT:  4      11    1.50    *                   vpclmulqdq	$11, (%rax), %xmm17, %xmm19
+# CHECK-NEXT:  4      4     1.50                        vpclmulqdq	$11, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT:  4      11    1.50    *                   vpclmulqdq	$11, (%rax), %ymm17, %ymm19
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn4AGU0
@@ -48,11 +48,11 @@ vpclmulqdq    $11, (%rax), %ymm17, %ymm19
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     8.00   8.00    -      -     1.00   1.00    -     0.67   0.67   0.67   0.67   0.67   0.67    -      -
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     6.00   6.00    -      -     1.00   1.00    -     0.67   0.67   0.67   0.67   0.67   0.67    -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vpclmulqdq	$11, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpclmulqdq	$11, (%rax), %xmm17, %xmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vpclmulqdq	$11, %ymm16, %ymm17, %ymm19
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpclmulqdq	$11, (%rax), %ymm17, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -      -      -      -      -      -      -      -      -      -      -      -     vpclmulqdq	$11, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpclmulqdq	$11, (%rax), %xmm17, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -      -      -      -      -      -      -      -      -      -      -      -     vpclmulqdq	$11, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpclmulqdq	$11, (%rax), %ymm17, %ymm19
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-bmi1.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-bmi1.s
index f4888cf..afbd566 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-bmi1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-bmi1.s
@@ -69,12 +69,12 @@ tzcnt       (%rax), %rcx
 # CHECK-NEXT:  2      5     0.33    *                   blsrl	(%rax), %ecx
 # CHECK-NEXT:  1      1     0.25                        blsrq	%rax, %rcx
 # CHECK-NEXT:  2      5     0.33    *                   blsrq	(%rax), %rcx
-# CHECK-NEXT:  2      2     1.00                        tzcntw	%ax, %cx
-# CHECK-NEXT:  2      6     0.50    *                   tzcntw	(%rax), %cx
-# CHECK-NEXT:  2      2     0.50                        tzcntl	%eax, %ecx
-# CHECK-NEXT:  2      6     0.50    *                   tzcntl	(%rax), %ecx
-# CHECK-NEXT:  2      2     0.50                        tzcntq	%rax, %rcx
-# CHECK-NEXT:  2      6     0.50    *                   tzcntq	(%rax), %rcx
+# CHECK-NEXT:  1      1     0.25                        tzcntw	%ax, %cx
+# CHECK-NEXT:  1      5     0.50    *                   tzcntw	(%rax), %cx
+# CHECK-NEXT:  1      1     0.50                        tzcntl	%eax, %ecx
+# CHECK-NEXT:  1      5     0.50    *                   tzcntl	(%rax), %ecx
+# CHECK-NEXT:  1      1     0.50                        tzcntq	%rax, %rcx
+# CHECK-NEXT:  1      5     0.50    *                   tzcntq	(%rax), %rcx
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn4AGU0
@@ -103,7 +103,7 @@ tzcnt       (%rax), %rcx
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 4.33   4.33   4.33   5.00   9.50   9.50   5.00    -      -      -      -      -      -      -      -     4.33   4.33   4.33   4.33   4.33   4.33    -      -
+# CHECK-NEXT: 4.33   4.33   4.33   4.25   8.75   8.75   4.25    -      -      -      -      -      -      -      -     4.33   4.33   4.33   4.33   4.33   4.33    -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -127,7 +127,7 @@ tzcnt       (%rax), %rcx
 # CHECK-NEXT: 0.33   0.33   0.33   0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     blsrl	(%rax), %ecx
 # CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     blsrq	%rax, %rcx
 # CHECK-NEXT: 0.33   0.33   0.33   0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     blsrq	(%rax), %rcx
-# CHECK-NEXT:  -      -      -     1.00   1.00   1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     tzcntw	%ax, %cx
+# CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     tzcntw	%ax, %cx
 # CHECK-NEXT: 0.33   0.33   0.33    -     0.50   0.50    -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     tzcntw	(%rax), %cx
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     tzcntl	%eax, %ecx
 # CHECK-NEXT: 0.33   0.33   0.33    -     0.50   0.50    -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     tzcntl	(%rax), %ecx
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s
index 64feeaf..26a42fd 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s
@@ -15,10 +15,10 @@ lock cmpxchg16b (%rax)
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  19     3     6.00    *      *            cmpxchg8b	(%rax)
-# CHECK-NEXT:  28     4     14.75   *      *            cmpxchg16b	(%rax)
-# CHECK-NEXT:  19     3     6.00    *      *            lock		cmpxchg8b	(%rax)
-# CHECK-NEXT:  28     4     14.75   *      *            lock		cmpxchg16b	(%rax)
+# CHECK-NEXT:  15     3     5.00    *      *            cmpxchg8b	(%rax)
+# CHECK-NEXT:  26     2     10.00   *      *            cmpxchg16b	(%rax)
+# CHECK-NEXT:  15     3     5.00    *      *            lock		cmpxchg8b	(%rax)
+# CHECK-NEXT:  26     2     10.00   *      *            lock		cmpxchg16b	(%rax)
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn4AGU0
@@ -47,11 +47,11 @@ lock cmpxchg16b (%rax)
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -     41.50  41.50  41.50  41.50   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     30.00  30.00  30.00  30.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -     6.00   6.00   6.00   6.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     cmpxchg8b	(%rax)
-# CHECK-NEXT:  -      -      -     14.75  14.75  14.75  14.75   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     cmpxchg16b	(%rax)
-# CHECK-NEXT:  -      -      -     6.00   6.00   6.00   6.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     lock		cmpxchg8b	(%rax)
-# CHECK-NEXT:  -      -      -     14.75  14.75  14.75  14.75   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     lock		cmpxchg16b	(%rax)
+# CHECK-NEXT:  -      -      -     5.00   5.00   5.00   5.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     cmpxchg8b	(%rax)
+# CHECK-NEXT:  -      -      -     10.00  10.00  10.00  10.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     cmpxchg16b	(%rax)
+# CHECK-NEXT:  -      -      -     5.00   5.00   5.00   5.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     lock		cmpxchg8b	(%rax)
+# CHECK-NEXT:  -      -      -     10.00  10.00  10.00  10.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     lock		cmpxchg16b	(%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-pclmul.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-pclmul.s
index a36fb2aa..fc2bc8e 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-pclmul.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-pclmul.s
@@ -13,8 +13,8 @@ pclmulqdq     $11, (%rax), %xmm2
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  4      4     2.00                        pclmulqdq	$11, %xmm0, %xmm2
-# CHECK-NEXT:  4      11    2.00    *                   pclmulqdq	$11, (%rax), %xmm2
+# CHECK-NEXT:  4      4     1.50                        pclmulqdq	$11, %xmm0, %xmm2
+# CHECK-NEXT:  4      11    1.50    *                   pclmulqdq	$11, (%rax), %xmm2
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn4AGU0
@@ -43,9 +43,9 @@ pclmulqdq     $11, (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     4.00   4.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     3.00   3.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -      -     pclmulqdq	$11, %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     pclmulqdq	$11, (%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -      -      -      -      -      -      -      -      -      -      -      -     pclmulqdq	$11, %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     pclmulqdq	$11, (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse42.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse42.s
index 015d37e..ae60835 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse42.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse42.s
@@ -52,12 +52,12 @@ pcmpgtq     (%rax), %xmm2
 # CHECK-NEXT:  1      7     1.00    *                   crc32q	(%rax), %rcx
 # CHECK-NEXT:  8      6     3.00                        pcmpestri	$1, %xmm0, %xmm2
 # CHECK-NEXT:  12     13    3.00    *                   pcmpestri	$1, (%rax), %xmm2
-# CHECK-NEXT:  7      6     3.00                        pcmpestrm	$1, %xmm0, %xmm2
-# CHECK-NEXT:  12     13    3.00    *                   pcmpestrm	$1, (%rax), %xmm2
+# CHECK-NEXT:  7      7     3.00                        pcmpestrm	$1, %xmm0, %xmm2
+# CHECK-NEXT:  12     14    3.00    *                   pcmpestrm	$1, (%rax), %xmm2
 # CHECK-NEXT:  4      2     2.00                        pcmpistri	$1, %xmm0, %xmm2
 # CHECK-NEXT:  4      9     2.00    *                   pcmpistri	$1, (%rax), %xmm2
-# CHECK-NEXT:  3      6     2.00                        pcmpistrm	$1, %xmm0, %xmm2
-# CHECK-NEXT:  4      13    2.00    *                   pcmpistrm	$1, (%rax), %xmm2
+# CHECK-NEXT:  3      7     2.00                        pcmpistrm	$1, %xmm0, %xmm2
+# CHECK-NEXT:  4      14    2.00    *                   pcmpistrm	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.25                        pcmpgtq	%xmm0, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   pcmpgtq	(%rax), %xmm2
 
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-vpclmulqdq.s
index 55a36d0..dca4703 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-vpclmulqdq.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-vpclmulqdq.s
@@ -13,8 +13,8 @@ vpclmulqdq    $11, (%rax), %ymm1, %ymm3
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  4      4     2.00                        vpclmulqdq	$11, %ymm0, %ymm1, %ymm3
-# CHECK-NEXT:  4      11    2.00    *                   vpclmulqdq	$11, (%rax), %ymm1, %ymm3
+# CHECK-NEXT:  4      4     1.50                        vpclmulqdq	$11, %ymm0, %ymm1, %ymm3
+# CHECK-NEXT:  4      11    1.50    *                   vpclmulqdq	$11, (%rax), %ymm1, %ymm3
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn4AGU0
@@ -43,9 +43,9 @@ vpclmulqdq    $11, (%rax), %ymm1, %ymm3
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     4.00   4.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     3.00   3.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vpclmulqdq	$11, %ymm0, %ymm1, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpclmulqdq	$11, (%rax), %ymm1, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -      -      -      -      -      -      -      -      -      -      -      -     vpclmulqdq	$11, %ymm0, %ymm1, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.50   1.50    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpclmulqdq	$11, (%rax), %ymm1, %ymm3
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-x86_64.s
index 9c5b4e4..886d9c6 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-x86_64.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-x86_64.s
@@ -1173,18 +1173,18 @@ xorq (%rax), %rdi
 # CHECK-NEXT:  1      6     0.67    *      *            andq	%rsi, (%rax)
 # CHECK-NEXT:  1      6     0.67    *      *            lock		andq	%rsi, (%rax)
 # CHECK-NEXT:  1      5     0.33    *                   andq	(%rax), %rdi
-# CHECK-NEXT:  6      1     1.00                        bsfw	%si, %di
-# CHECK-NEXT:  6      1     1.00                        bsrw	%si, %di
-# CHECK-NEXT:  7      5     1.00    *                   bsfw	(%rax), %di
-# CHECK-NEXT:  7      5     1.00    *                   bsrw	(%rax), %di
-# CHECK-NEXT:  6      1     1.00                        bsfl	%esi, %edi
-# CHECK-NEXT:  6      1     1.00                        bsrl	%esi, %edi
-# CHECK-NEXT:  7      5     1.00    *                   bsfl	(%rax), %edi
-# CHECK-NEXT:  7      5     1.00    *                   bsrl	(%rax), %edi
-# CHECK-NEXT:  6      1     1.00                        bsfq	%rsi, %rdi
-# CHECK-NEXT:  6      1     1.00                        bsrq	%rsi, %rdi
-# CHECK-NEXT:  7      5     1.00    *                   bsfq	(%rax), %rdi
-# CHECK-NEXT:  7      5     1.00    *                   bsrq	(%rax), %rdi
+# CHECK-NEXT:  1      1     1.00                        bsfw	%si, %di
+# CHECK-NEXT:  1      1     1.00                        bsrw	%si, %di
+# CHECK-NEXT:  2      5     1.00    *                   bsfw	(%rax), %di
+# CHECK-NEXT:  2      5     1.00    *                   bsrw	(%rax), %di
+# CHECK-NEXT:  1      1     1.00                        bsfl	%esi, %edi
+# CHECK-NEXT:  1      1     1.00                        bsrl	%esi, %edi
+# CHECK-NEXT:  2      5     1.00    *                   bsfl	(%rax), %edi
+# CHECK-NEXT:  2      5     1.00    *                   bsrl	(%rax), %edi
+# CHECK-NEXT:  1      1     1.00                        bsfq	%rsi, %rdi
+# CHECK-NEXT:  1      1     1.00                        bsrq	%rsi, %rdi
+# CHECK-NEXT:  2      5     1.00    *                   bsfq	(%rax), %rdi
+# CHECK-NEXT:  2      5     1.00    *                   bsrq	(%rax), %rdi
 # CHECK-NEXT:  1      1     0.25                        bswapl	%eax
 # CHECK-NEXT:  1      1     0.25                        bswapq	%rax
 # CHECK-NEXT:  1      1     0.50                        btw	%si, %di
@@ -1321,23 +1321,23 @@ xorq (%rax), %rdi
 # CHECK-NEXT:  1      1     0.25                        decq	%rdi
 # CHECK-NEXT:  1      6     0.67    *      *            decq	(%rax)
 # CHECK-NEXT:  1      6     0.67    *      *            lock		decq	(%rax)
-# CHECK-NEXT:  2      10    10.00                 U     divb	%dil
-# CHECK-NEXT:  2      14    10.00   *             U     divb	(%rax)
-# CHECK-NEXT:  2      11    11.00                 U     divw	%si
-# CHECK-NEXT:  2      15    11.00   *             U     divw	(%rax)
-# CHECK-NEXT:  2      13    13.00                 U     divl	%edx
-# CHECK-NEXT:  2      17    13.00   *             U     divl	(%rax)
-# CHECK-NEXT:  2      17    17.00                 U     divq	%rcx
-# CHECK-NEXT:  2      21    17.00   *             U     divq	(%rax)
+# CHECK-NEXT:  2      9     9.00                  U     divb	%dil
+# CHECK-NEXT:  2      13    9.00    *             U     divb	(%rax)
+# CHECK-NEXT:  2      10    10.00                 U     divw	%si
+# CHECK-NEXT:  2      14    10.00   *             U     divw	(%rax)
+# CHECK-NEXT:  2      12    12.00                 U     divl	%edx
+# CHECK-NEXT:  2      16    12.00   *             U     divl	(%rax)
+# CHECK-NEXT:  2      18    18.00                 U     divq	%rcx
+# CHECK-NEXT:  2      22    18.00   *             U     divq	(%rax)
 # CHECK-NEXT:  100    100   25.00                 U     enter	$7, $4095
-# CHECK-NEXT:  2      10    10.00                 U     idivb	%dil
-# CHECK-NEXT:  2      14    10.00   *             U     idivb	(%rax)
-# CHECK-NEXT:  2      11    11.00                 U     idivw	%si
-# CHECK-NEXT:  2      15    11.00   *             U     idivw	(%rax)
-# CHECK-NEXT:  2      13    13.00                 U     idivl	%edx
-# CHECK-NEXT:  2      17    13.00   *             U     idivl	(%rax)
-# CHECK-NEXT:  2      17    17.00                 U     idivq	%rcx
-# CHECK-NEXT:  2      21    17.00   *             U     idivq	(%rax)
+# CHECK-NEXT:  2      9     9.00                  U     idivb	%dil
+# CHECK-NEXT:  2      13    9.00    *             U     idivb	(%rax)
+# CHECK-NEXT:  2      10    10.00                 U     idivw	%si
+# CHECK-NEXT:  2      14    10.00   *             U     idivw	(%rax)
+# CHECK-NEXT:  2      12    12.00                 U     idivl	%edx
+# CHECK-NEXT:  2      16    12.00   *             U     idivl	(%rax)
+# CHECK-NEXT:  2      18    18.00                 U     idivq	%rcx
+# CHECK-NEXT:  2      22    18.00   *             U     idivq	(%rax)
 # CHECK-NEXT:  1      3     3.00                        imulb	%dil
 # CHECK-NEXT:  1      7     3.00    *                   imulb	(%rax)
 # CHECK-NEXT:  3      3     3.00                        imulw	%di
@@ -1891,12 +1891,12 @@ xorq (%rax), %rdi
 # CHECK-NEXT:  1      5     0.67    *      *            xaddq	%rax, (%rbx)
 # CHECK-NEXT:  1      5     0.67    *      *            lock		xaddq	%rax, (%rbx)
 # CHECK-NEXT:  2      1     0.50                        xchgb	%bl, %cl
-# CHECK-NEXT:  5      7     0.50    *      *            xchgb	%bl, (%rbx)
-# CHECK-NEXT:  5      7     0.50    *      *            lock		xchgb	%bl, (%rbx)
+# CHECK-NEXT:  2      7     0.50    *      *            xchgb	%bl, (%rbx)
+# CHECK-NEXT:  2      7     0.50    *      *            lock		xchgb	%bl, (%rbx)
 # CHECK-NEXT:  2      1     0.50                        xchgw	%bx, %ax
 # CHECK-NEXT:  2      1     0.50                        xchgw	%bx, %cx
-# CHECK-NEXT:  5      7     0.50    *      *            xchgw	%ax, (%rbx)
-# CHECK-NEXT:  5      7     0.50    *      *            lock		xchgw	%ax, (%rbx)
+# CHECK-NEXT:  2      7     0.50    *      *            xchgw	%ax, (%rbx)
+# CHECK-NEXT:  2      7     0.50    *      *            lock		xchgw	%ax, (%rbx)
 # CHECK-NEXT:  2      0     0.33                        xchgl	%ebx, %eax
 # CHECK-NEXT:  2      0     0.33                        xchgl	%ebx, %ecx
 # CHECK-NEXT:  2      6     0.50    *      *            xchgl	%eax, (%rbx)
@@ -1975,7 +1975,7 @@ xorq (%rax), %rdi
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 259.00 259.00 259.00 1733.00 1865.50 1775.50 1529.50 1.50  -    -      -      -      -      -      -     259.00 259.00 259.00 151.67 151.67 151.67 161.00 161.00
+# CHECK-NEXT: 259.00 259.00 259.00 1725.00 1865.50 1775.50 1529.50 1.50  -    -      -      -      -      -      -     259.00 259.00 259.00 151.67 151.67 151.67 161.00 161.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2266,23 +2266,23 @@ xorq (%rax), %rdi
 # CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     decq	%rdi
 # CHECK-NEXT: 0.67   0.67   0.67   0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -     0.67   0.67   0.67   0.33   0.33   0.33   0.50   0.50   decq	(%rax)
 # CHECK-NEXT: 0.67   0.67   0.67   0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -     0.67   0.67   0.67   0.33   0.33   0.33   0.50   0.50   lock		decq	(%rax)
-# CHECK-NEXT:  -      -      -     10.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     divb	%dil
-# CHECK-NEXT: 0.33   0.33   0.33   10.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     divb	(%rax)
-# CHECK-NEXT:  -      -      -     11.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     divw	%si
-# CHECK-NEXT: 0.33   0.33   0.33   11.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     divw	(%rax)
-# CHECK-NEXT:  -      -      -     13.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     divl	%edx
-# CHECK-NEXT: 0.33   0.33   0.33   13.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     divl	(%rax)
-# CHECK-NEXT:  -      -      -     17.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     divq	%rcx
-# CHECK-NEXT: 0.33   0.33   0.33   17.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     divq	(%rax)
+# CHECK-NEXT:  -      -      -     9.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     divb	%dil
+# CHECK-NEXT: 0.33   0.33   0.33   9.00    -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     divb	(%rax)
+# CHECK-NEXT:  -      -      -     10.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     divw	%si
+# CHECK-NEXT: 0.33   0.33   0.33   10.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     divw	(%rax)
+# CHECK-NEXT:  -      -      -     12.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     divl	%edx
+# CHECK-NEXT: 0.33   0.33   0.33   12.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     divl	(%rax)
+# CHECK-NEXT:  -      -      -     18.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     divq	%rcx
+# CHECK-NEXT: 0.33   0.33   0.33   18.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     divq	(%rax)
 # CHECK-NEXT:  -      -      -     25.00  25.00  25.00  25.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     enter	$7, $4095
-# CHECK-NEXT:  -      -      -     10.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     idivb	%dil
-# CHECK-NEXT: 0.33   0.33   0.33   10.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     idivb	(%rax)
-# CHECK-NEXT:  -      -      -     11.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     idivw	%si
-# CHECK-NEXT: 0.33   0.33   0.33   11.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     idivw	(%rax)
-# CHECK-NEXT:  -      -      -     13.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     idivl	%edx
-# CHECK-NEXT: 0.33   0.33   0.33   13.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     idivl	(%rax)
-# CHECK-NEXT:  -      -      -     17.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     idivq	%rcx
-# CHECK-NEXT: 0.33   0.33   0.33   17.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     idivq	(%rax)
+# CHECK-NEXT:  -      -      -     9.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     idivb	%dil
+# CHECK-NEXT: 0.33   0.33   0.33   9.00    -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     idivb	(%rax)
+# CHECK-NEXT:  -      -      -     10.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     idivw	%si
+# CHECK-NEXT: 0.33   0.33   0.33   10.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     idivw	(%rax)
+# CHECK-NEXT:  -      -      -     12.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     idivl	%edx
+# CHECK-NEXT: 0.33   0.33   0.33   12.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     idivl	(%rax)
+# CHECK-NEXT:  -      -      -     18.00   -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     idivq	%rcx
+# CHECK-NEXT: 0.33   0.33   0.33   18.00   -      -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     idivq	(%rax)
 # CHECK-NEXT:  -      -      -      -     3.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     imulb	%dil
 # CHECK-NEXT: 0.33   0.33   0.33    -     3.00    -      -      -      -      -      -      -      -      -      -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     imulb	(%rax)
 # CHECK-NEXT:  -      -      -      -     3.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     imulw	%di