41 files changed, 2450 insertions, 241 deletions
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
index ddcaeaf..b5420e9 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
@@ -848,8 +848,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1>, <
 define <vscale x 4 x i32> @simplify_sqrshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqrshl_intrinsic
 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
 ;
   %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %r
@@ -859,8 +858,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1>, <v
 define <vscale x 4 x i32> @simplify_sqshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqshl_intrinsic
 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
 ;
   %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %r
@@ -902,8 +900,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1>, <v
 define <vscale x 4 x i32> @simplify_srshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @simplify_srshl_intrinsic
 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
 ;
   %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %r
@@ -1105,8 +1102,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1>, <
 define <vscale x 4 x i32> @simplify_uqrshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uqrshl_intrinsic
 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
 ;
   %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %r
@@ -1116,8 +1112,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1>, <v
 define <vscale x 4 x i32> @simplify_uqshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uqshl_intrinsic
 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
 ;
   %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %r
@@ -1170,8 +1165,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1>, <v
 define <vscale x 4 x i32> @simplify_urshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @simplify_urshl_intrinsic
 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
 ;
   %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %r
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll
index 41e7afc..0213d05 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll
@@ -55,15 +55,40 @@ define <vscale x 8 x i16> @dupx_splat_convert(i16 %s) #0 {
   ret <vscale x 8 x i16> %splat
 }
 
-declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
+define <vscale x 16 x i8> @dup_all_active_i8(<vscale x 16 x i8> %v, i8 %s) #0 {
+; CHECK-LABEL: @dup_all_active_i8(
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[S:%.*]], i64 0
+; CHECK-NEXT:    [[INSERT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[INSERT]]
+;
+  %insert = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %v, <vscale x 16 x i1> splat(i1 true), i8 %s)
+  ret <vscale x 16 x i8> %insert
+}
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
+define <vscale x 4 x i32> @dup_all_active_i32(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: @dup_all_active_i32(
+; CHECK-NEXT:    ret <vscale x 4 x i32> splat (i32 73)
+;
+  %insert = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i1> splat(i1 true), i32 73)
+  ret <vscale x 4 x i32> %insert
+}
 
-declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
-declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
+define <vscale x 4 x float> @dup_all_active_f32(<vscale x 4 x float> %v, float %s) #0 {
+; CHECK-LABEL: @dup_all_active_f32(
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[S:%.*]], i64 0
+; CHECK-NEXT:    [[INSERT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 4 x float> [[INSERT]]
+;
+  %insert = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> %v, <vscale x 4 x i1> splat(i1 true), float %s)
+  ret <vscale x 4 x float> %insert
+}
 
-declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
-declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+define <vscale x 2 x double> @dup_all_active_f64(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: @dup_all_active_f64(
+; CHECK-NEXT:    ret <vscale x 2 x double> splat (double 1.000000e+00)
+;
+  %insert = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> %v, <vscale x 2 x i1> splat(i1 true), double 1.0)
+  ret <vscale x 2 x double> %insert
+}
 
 attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
index 8072b3f..96ac0ef 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
@@ -357,6 +357,26 @@ define <vscale x 4 x i32> @replace_smulh_intrinsic_i32(<vscale x 4 x i32> %a, <v
   ret <vscale x 4 x i32> %r
 }
 
+define <vscale x 4 x i32> @replace_sqrshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqrshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @replace_sqshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32
@@ -368,6 +388,16 @@ define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <v
   ret <vscale x 4 x i32> %r
 }
 
+define <vscale x 4 x i32> @replace_srshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_srshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 define <vscale x 4 x i32> @replace_sub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @replace_sub_intrinsic_i32
@@ -434,6 +464,26 @@ define <vscale x 4 x i32> @replace_umulh_intrinsic_i32(<vscale x 4 x i32> %a, <v
   ret <vscale x 4 x i32> %r
 }
 
+define <vscale x 4 x i32> @replace_uqrshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqrshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @replace_uqshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32
@@ -445,4 +495,14 @@ define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <v
   ret <vscale x 4 x i32> %r
 }
 
+define <vscale x 4 x i32> @replace_urshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_urshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
 attributes #0 = { "target-features"="+sve,+sve2" }
diff --git a/llvm/test/Transforms/InstCombine/AArch64/tbl.ll b/llvm/test/Transforms/InstCombine/AArch64/tbl.ll
new file mode 100644
index 0000000..8a9ca6c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/tbl.ll
@@ -0,0 +1,261 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; We can turn a tbl/tbx intrinsic into a shufflevector instruction if the mask
+; is constant and references 2 or fewer operands.
+
+; Basic tbl1 with all in-bounds indices should optimize to shufflevector.
+define <16 x i8> @tbl1_basic(<16 x i8> %a) {
+; CHECK-LABEL: @tbl1_basic(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <16 x i8> %tbl
+}
+
+; tbl2 with both operands the same should optimize (1 unique source).
+define <16 x i8> @tbl2_duplicate_operands(<16 x i8> %a) {
+; CHECK-LABEL: @tbl2_duplicate_operands(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>)
+  ret <16 x i8> %tbl
+}
+
+; tbl3 referencing 2 unique operands should optimize.
+define <16 x i8> @tbl3_two_sources(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @tbl3_two_sources(
+; CHECK-NEXT:    [[TBL:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 0, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    ret <16 x i8> [[TBL]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 32, i8 33, i8 34, i8 35, i8 0, i8 0, i8 0, i8 0>)
+  ret <16 x i8> %tbl
+}
+
+; tbl4 with alternating duplicate operands should optimize (2 unique sources).
+define <16 x i8> @tbl4_duplicate_operands(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @tbl4_duplicate_operands(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 32, i8 33, i8 34, i8 35, i8 48, i8 49, i8 50, i8 51>)
+  ret <16 x i8> %tbl
+}
+
+; tbl4 where mask only references first two operands should optimize.
+define <16 x i8> @tbl4_unused_operands(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: @tbl4_unused_operands(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>)
+  ret <16 x i8> %tbl
+}
+
+; tbl4 where mask only references one operand should optimize.
+define <16 x i8> @tbl4_single_operand_used(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: @tbl4_single_operand_used(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <16 x i8> %tbl
+}
+
+; tbl1 with some OOB indices should optimize (1 source + zero vector = 2 sources).
+define <16 x i8> @tbl1_with_oob(<16 x i8> %a) {
+; CHECK-LABEL: @tbl1_with_oob(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>)
+  ret <16 x i8> %tbl
+}
+
+; tbl2 with duplicate operands and OOB should optimize (1 unique source + zero vector = 2 sources).
+define <16 x i8> @tbl2_duplicate_with_oob(<16 x i8> %a) {
+; CHECK-LABEL: @tbl2_duplicate_with_oob(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+  ret <16 x i8> %tbl
+}
+
+; tbl2 with OOB indices should NOT optimize (2 sources + zero vector = 3 sources).
+define <16 x i8> @tbl2_with_oob_bail(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @tbl2_with_oob_bail(
+; CHECK-NEXT:    [[TBL:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+; CHECK-NEXT:    ret <16 x i8> [[TBL]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+  ret <16 x i8> %tbl
+}
+
+; tbl1 with all OOB indices should optimize to zero vector.
+define <16 x i8> @tbl1_all_oob(<16 x i8> %a) {
+; CHECK-LABEL: @tbl1_all_oob(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> <i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+  ret <16 x i8> %tbl
+}
+
+; tbl3 referencing all 3 operands should NOT optimize.
+define <16 x i8> @tbl3_three_sources_bail(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: @tbl3_three_sources_bail(
+; CHECK-NEXT:    [[TBL:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 32, i8 33, i8 34, i8 35, i8 0, i8 0, i8 0, i8 0>)
+; CHECK-NEXT:    ret <16 x i8> [[TBL]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 32, i8 33, i8 34, i8 35, i8 0, i8 0, i8 0, i8 0>)
+  ret <16 x i8> %tbl
+}
+
+; tbl4 referencing 3 unique operands should NOT optimize.
+define <16 x i8> @tbl4_three_sources_bail(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: @tbl4_three_sources_bail(
+; CHECK-NEXT:    [[TBL:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[A]], <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 32, i8 33, i8 34, i8 35, i8 48, i8 49, i8 50, i8 51>)
+; CHECK-NEXT:    ret <16 x i8> [[TBL]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 32, i8 33, i8 34, i8 35, i8 48, i8 49, i8 50, i8 51>)
+  ret <16 x i8> %tbl
+}
+
+; tbl4 referencing all 4 unique operands should NOT optimize.
+define <16 x i8> @tbl4_four_sources_bail(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: @tbl4_four_sources_bail(
+; CHECK-NEXT:    [[TBL:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 32, i8 33, i8 34, i8 35, i8 48, i8 49, i8 50, i8 51>)
+; CHECK-NEXT:    ret <16 x i8> [[TBL]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 32, i8 33, i8 34, i8 35, i8 48, i8 49, i8 50, i8 51>)
+  ret <16 x i8> %tbl
+}
+
+; tbx1 with no OOB should optimize.
+define <16 x i8> @tbx1_no_oob(<16 x i8> %fallback, <16 x i8> %a) {
+; CHECK-LABEL: @tbx1_no_oob(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %tbx = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %fallback, <16 x i8> %a, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <16 x i8> %tbx
+}
+
+; tbx2 where fallback == second source operand should optimize (deduplicated).
+define <16 x i8> @tbx2_fallback_equals_second_source(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @tbx2_fallback_equals_second_source(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %tbx = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %b, <16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+  ret <16 x i8> %tbx
+}
+
+; tbx1 with OOB where fallback == source should optimize (deduplicated).
+define <16 x i8> @tbx1_oob_fallback_same_as_source(<16 x i8> %a) {
+; CHECK-LABEL: @tbx1_oob_fallback_same_as_source(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <16 x i8> [[A1:%.*]], <16 x i8> poison, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    ret <16 x i8> [[A]]
+;
+  %tbx = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+  ret <16 x i8> %tbx
+}
+
+; tbx2 with OOB should NOT optimize (2 sources + fallback = 3 sources).
+define <16 x i8> @tbx2_with_oob_bail(<16 x i8> %fallback, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @tbx2_with_oob_bail(
+; CHECK-NEXT:    [[TBX:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[FALLBACK:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+; CHECK-NEXT:    ret <16 x i8> [[TBX]]
+;
+  %tbx = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %fallback, <16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 16, i8 17, i8 18, i8 19, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+  ret <16 x i8> %tbx
+}
+
+; tbx1 with all OOB indices should optimize to fallback.
+define <16 x i8> @tbx1_all_oob(<16 x i8> %fallback, <16 x i8> %a) {
+; CHECK-LABEL: @tbx1_all_oob(
+; CHECK-NEXT:    ret <16 x i8> [[FALLBACK:%.*]]
+;
+  %tbx = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %fallback, <16 x i8> %a, <16 x i8> <i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+  ret <16 x i8> %tbx
+}
+
+; tbx1 with OOB and mismatched fallback/source sizes should NOT optimize.
+define <8 x i8> @tbx1_fallback_size_mismatch(<8 x i8> %fallback, <16 x i8> %a) {
+; CHECK-LABEL: @tbx1_fallback_size_mismatch(
+; CHECK-NEXT:    [[TBX:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[FALLBACK:%.*]], <16 x i8> [[A:%.*]], <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 99, i8 99, i8 99, i8 99>)
+; CHECK-NEXT:    ret <8 x i8> [[TBX]]
+;
+  %tbx = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %fallback, <16 x i8> %a, <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 99, i8 99, i8 99, i8 99>)
+  ret <8 x i8> %tbx
+}
+
+; tbx1 with no OOB and mismatched fallback/source sizes should optimize.
+define <8 x i8> @tbx1_fallback_size_mismatch_no_oob(<8 x i8> %fallback, <16 x i8> %a) {
+; CHECK-LABEL: @tbx1_fallback_size_mismatch_no_oob(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbx = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %fallback, <16 x i8> %a, <8 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <8 x i8> %tbx
+}
+
+; tbl1 with non-i8 element type should NOT optimize.
+define <8 x i16> @tbl1_8x16(<16 x i8> %vec) {
+; CHECK-LABEL: @tbl1_8x16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TBL1:%.*]] = call <8 x i16> @llvm.aarch64.neon.tbl1.v8i16(<16 x i8> [[VEC:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; CHECK-NEXT:    ret <8 x i16> [[TBL1]]
+;
+entry:
+  ; `tbl1.v8i16` is not really a thing, but it's good to check.
+  %tbl1 = call <8 x i16> @llvm.aarch64.neon.tbl1.v8i16(<16 x i8> %vec, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+  ret <8 x i16> %tbl1
+}
+
+; tbl1 with non-8/16 element count should NOT optimize.
+define <12 x i8> @tbl1_16x8(<16 x i8> %vec) {
+; CHECK-LABEL: @tbl1_16x8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TBL1:%.*]] = call <12 x i8> @llvm.aarch64.neon.tbl1.v12i8(<16 x i8> [[VEC:%.*]], <12 x i8> <i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; CHECK-NEXT:    ret <12 x i8> [[TBL1]]
+;
+entry:
+  ; `tbl1.v12i8` is not really a thing, but it's good to check.
+  %tbl1 = call <12 x i8> @llvm.aarch64.neon.tbl1.v12i8(<16 x i8> %vec, <12 x i8> <i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <12 x i8> %tbl1
+}
+
+; Non-constant mask should NOT optimize.
+define <16 x i8> @tbl1_non_constant_mask(<16 x i8> %a, <16 x i8> %mask) {
+; CHECK-LABEL: @tbl1_non_constant_mask(
+; CHECK-NEXT:    [[TBL:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[MASK:%.*]])
+; CHECK-NEXT:    ret <16 x i8> [[TBL]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %mask)
+  ret <16 x i8> %tbl
+}
+
+; Mask with some poison elements should optimize, with poison propagating to output.
+define <16 x i8> @tbl1_poison_mask_elements(<16 x i8> %a) {
+; CHECK-LABEL: @tbl1_poison_mask_elements(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 0, i32 poison, i32 2, i32 poison, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> <i8 0, i8 poison, i8 2, i8 poison, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <16 x i8> %tbl
+}
+
+; Mask with all poison elements should optimize to poison.
+define <16 x i8> @tbl1_all_poison_mask(<16 x i8> %a) {
+; CHECK-LABEL: @tbl1_all_poison_mask(
+; CHECK-NEXT:    ret <16 x i8> poison
+;
+  %tbl = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> poison)
+  ret <16 x i8> %tbl
+}
diff --git a/llvm/test/Transforms/InstCombine/AArch64/tbl1.ll b/llvm/test/Transforms/InstCombine/AArch64/tbl1.ll
deleted file mode 100644
index 362cc0f..0000000
--- a/llvm/test/Transforms/InstCombine/AArch64/tbl1.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64"
-
-; Turning a table lookup intrinsic into a shuffle vector instruction
-; can be beneficial. If the mask used for the lookup is the constant
-; vector {7,6,5,4,3,2,1,0}, then the back-end generates rev64
-; instructions instead.
-
-define <8 x i8> @tbl1_8x8(<16 x i8> %vec) {
-; CHECK-LABEL: @tbl1_8x8(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[VEC:%.*]], <16 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i8> [[TMP0]]
-;
-entry:
-  %tbl1 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vec, <8 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-  ret <8 x i8> %tbl1
-}
-
-; Bail the optimization if a mask index is out of range.
-define <8 x i8> @tbl1_8x8_out_of_range(<16 x i8> %vec) {
-; CHECK-LABEL: @tbl1_8x8_out_of_range(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TBL1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VEC:%.*]], <8 x i8> <i8 8, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; CHECK-NEXT:    ret <8 x i8> [[TBL1]]
-;
-entry:
-  %tbl1 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vec, <8 x i8> <i8 8, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-  ret <8 x i8> %tbl1
-}
-
-; Bail the optimization if the size of the return vector is not 8 elements.
-define <16 x i8> @tbl1_16x8(<16 x i8> %vec) {
-; CHECK-LABEL: @tbl1_16x8(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TBL1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[VEC:%.*]], <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; CHECK-NEXT:    ret <16 x i8> [[TBL1]]
-;
-entry:
-  %tbl1 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %vec, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-  ret <16 x i8> %tbl1
-}
-
-; Bail the optimization if the elements of the return vector are not of type i8.
-define <8 x i16> @tbl1_8x16(<16 x i8> %vec) {
-; CHECK-LABEL: @tbl1_8x16(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TBL1:%.*]] = call <8 x i16> @llvm.aarch64.neon.tbl1.v8i16(<16 x i8> [[VEC:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; CHECK-NEXT:    ret <8 x i16> [[TBL1]]
-;
-entry:
-  %tbl1 = call <8 x i16> @llvm.aarch64.neon.tbl1.v8i16(<16 x i8> %vec, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-  ret <8 x i16> %tbl1
-}
-
-; The type <8 x i16> is not a valid return type for this intrinsic,
-; but we want to test that the optimization won't trigger for vector
-; elements of type different than i8.
-declare <8 x i16> @llvm.aarch64.neon.tbl1.v8i16(<16 x i8>, <8 x i16>)
-
-declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>)
-declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>)
diff --git a/llvm/test/Transforms/InstCombine/ARM/tbl.ll b/llvm/test/Transforms/InstCombine/ARM/tbl.ll
new file mode 100644
index 0000000..d4d5ec2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ARM/tbl.ll
@@ -0,0 +1,215 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv8-arm-none-eabi"
+
+; We can turn a vtbl/vtbx intrinsic into a shufflevector instruction if the mask
+; is constant and references 2 or fewer operands.
+
+; Basic vtbl1 with all in-bounds indices should optimize to shufflevector.
+define <8 x i8> @vtbl1_basic(<8 x i8> %a) {
+; CHECK-LABEL: @vtbl1_basic(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl2 with both operands the same should be optimized (1 unique source).
+define <8 x i8> @vtbl2_duplicate_operands(<8 x i8> %a) {
+; CHECK-LABEL: @vtbl2_duplicate_operands(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %a, <8 x i8> %a, <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl3 referencing 2 unique operands should optimize.
+define <8 x i8> @vtbl3_two_sources(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: @vtbl3_two_sources(
+; CHECK-NEXT:    [[TBL:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 0, i32 1, i32 0, i32 0>
+; CHECK-NEXT:    ret <8 x i8> [[TBL]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %a, <8 x i8> %b, <8 x i8> %a, <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 16, i8 17, i8 0, i8 0>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl4 with alternating duplicate operands should optimize (2 unique sources).
+define <8 x i8> @vtbl4_duplicate_operands(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: @vtbl4_duplicate_operands(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 0, i32 1, i32 8, i32 9>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %a, <8 x i8> %b, <8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 16, i8 17, i8 24, i8 25>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl4 where mask only references first two operands should optimize.
+define <8 x i8> @vtbl4_unused_operands(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
+; CHECK-LABEL: @vtbl4_unused_operands(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl4 where mask only references one operand should optimize.
+define <8 x i8> @vtbl4_single_operand_used(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
+; CHECK-LABEL: @vtbl4_single_operand_used(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl1 with some OOB indices should optimize (1 source + zero vector = 2 sources).
+define <8 x i8> @vtbl1_with_oob(<8 x i8> %a) {
+; CHECK-LABEL: @vtbl1_with_oob(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl2 with duplicate operands and OOB should optimize (1 unique source + zero vector = 2 sources).
+define <8 x i8> @vtbl2_duplicate_with_oob(<8 x i8> %a) {
+; CHECK-LABEL: @vtbl2_duplicate_with_oob(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %a, <8 x i8> %a, <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 99, i8 99, i8 99, i8 99>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl2 with OOB indices should NOT optimize (2 sources + zero vector = 3 sources).
+define <8 x i8> @vtbl2_with_oob_bail(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: @vtbl2_with_oob_bail(
+; CHECK-NEXT:    [[TBL:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 99, i8 99, i8 99, i8 99>)
+; CHECK-NEXT:    ret <8 x i8> [[TBL]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 99, i8 99, i8 99, i8 99>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl1 with all OOB indices should optimize to zero vector.
+define <8 x i8> @vtbl1_all_oob(<8 x i8> %a) {
+; CHECK-LABEL: @vtbl1_all_oob(
+; CHECK-NEXT:    ret <8 x i8> zeroinitializer
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> <i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl3 referencing all 3 operands should NOT optimize.
+define <8 x i8> @vtbl3_three_sources_bail(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK-LABEL: @vtbl3_three_sources_bail(
+; CHECK-NEXT:    [[TBL:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <8 x i8> [[C:%.*]], <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 16, i8 17, i8 0, i8 0>)
+; CHECK-NEXT:    ret <8 x i8> [[TBL]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 16, i8 17, i8 0, i8 0>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl4 referencing 3 unique operands should NOT optimize.
+define <8 x i8> @vtbl4_three_sources_bail(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK-LABEL: @vtbl4_three_sources_bail(
+; CHECK-NEXT:    [[TBL:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <8 x i8> [[C:%.*]], <8 x i8> [[A]], <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 16, i8 17, i8 24, i8 25>)
+; CHECK-NEXT:    ret <8 x i8> [[TBL]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %a, <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 16, i8 17, i8 24, i8 25>)
+  ret <8 x i8> %tbl
+}
+
+; vtbl4 referencing all 4 unique operands should NOT optimize.
+define <8 x i8> @vtbl4_four_sources_bail(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
+; CHECK-LABEL: @vtbl4_four_sources_bail(
+; CHECK-NEXT:    [[TBL:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <8 x i8> [[C:%.*]], <8 x i8> [[D:%.*]], <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 16, i8 17, i8 24, i8 25>)
+; CHECK-NEXT:    ret <8 x i8> [[TBL]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 16, i8 17, i8 24, i8 25>)
+  ret <8 x i8> %tbl
+}
+
+; vtbx1 with no OOB should optimize.
+define <8 x i8> @vtbx1_no_oob(<8 x i8> %fallback, <8 x i8> %a) {
+; CHECK-LABEL: @vtbx1_no_oob(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbx = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %fallback, <8 x i8> %a, <8 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <8 x i8> %tbx
+}
+
+; vtbx2 where fallback == second source operand should optimize (deduplicated).
+define <8 x i8> @vtbx2_fallback_equals_second_source(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: @vtbx2_fallback_equals_second_source(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbx = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %b, <8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 99, i8 99, i8 99, i8 99>)
+  ret <8 x i8> %tbx
+}
+
+; vtbx1 with OOB where fallback == source should optimize (deduplicated).
+define <8 x i8> @vtbx1_oob_fallback_same_as_source(<8 x i8> %a) {
+; CHECK-LABEL: @vtbx1_oob_fallback_same_as_source(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbx = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %a, <8 x i8> <i8 3, i8 2, i8 1, i8 0, i8 99, i8 99, i8 99, i8 99>)
+  ret <8 x i8> %tbx
+}
+
+; vtbx2 with OOB should NOT optimize (2 sources + fallback = 3 sources).
+define <8 x i8> @vtbx2_with_oob_bail(<8 x i8> %fallback, <8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: @vtbx2_with_oob_bail(
+; CHECK-NEXT:    [[TBX:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> [[FALLBACK:%.*]], <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 99, i8 99, i8 99, i8 99>)
+; CHECK-NEXT:    ret <8 x i8> [[TBX]]
+;
+  %tbx = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %fallback, <8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 0, i8 1, i8 8, i8 9, i8 99, i8 99, i8 99, i8 99>)
+  ret <8 x i8> %tbx
+}
+
+; vtbx1 with all OOB indices should optimize to fallback.
+define <8 x i8> @vtbx1_all_oob(<8 x i8> %fallback, <8 x i8> %a) {
+; CHECK-LABEL: @vtbx1_all_oob(
+; CHECK-NEXT:    ret <8 x i8> [[FALLBACK:%.*]]
+;
+  %tbx = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %fallback, <8 x i8> %a, <8 x i8> <i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99, i8 99>)
+  ret <8 x i8> %tbx
+}
+
+; Non-constant mask should NOT optimize.
+define <8 x i8> @vtbl1_non_constant_mask(<8 x i8> %a, <8 x i8> %mask) {
+; CHECK-LABEL: @vtbl1_non_constant_mask(
+; CHECK-NEXT:    [[TBL:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> [[A:%.*]], <8 x i8> [[MASK:%.*]])
+; CHECK-NEXT:    ret <8 x i8> [[TBL]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %mask)
+  ret <8 x i8> %tbl
+}
+
+; Mask with some poison elements should optimize, with poison propagating to output.
+define <8 x i8> @vtbl1_poison_mask_elements(<8 x i8> %a) {
+; CHECK-LABEL: @vtbl1_poison_mask_elements(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <8 x i32> <i32 0, i32 poison, i32 2, i32 poison, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> <i8 0, i8 poison, i8 2, i8 poison, i8 4, i8 5, i8 6, i8 7>)
+  ret <8 x i8> %tbl
+}
+
+; Mask with all poison elements should optimize to poison.
+define <8 x i8> @vtbl1_all_poison_mask(<8 x i8> %a) {
+; CHECK-LABEL: @vtbl1_all_poison_mask(
+; CHECK-NEXT:    ret <8 x i8> poison
+;
+  %tbl = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> poison)
+  ret <8 x i8> %tbl
+}
diff --git a/llvm/test/Transforms/InstCombine/ARM/tbl1.ll b/llvm/test/Transforms/InstCombine/ARM/tbl1.ll
deleted file mode 100644
index fbec1a2..0000000
--- a/llvm/test/Transforms/InstCombine/ARM/tbl1.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-
-target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
-target triple = "armv8-arm-none-eabi"
-
-; Turning a table lookup intrinsic into a shuffle vector instruction
-; can be beneficial. If the mask used for the lookup is the constant
-; vector {7,6,5,4,3,2,1,0}, then the back-end generates rev64
-; instructions instead.
-
-define <8 x i8> @tbl1_8x8(<8 x i8> %vec) {
-; CHECK-LABEL: @tbl1_8x8(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC:%.*]], <8 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i8> [[TMP0]]
-;
-entry:
-  %vtbl1 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %vec, <8 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-  ret <8 x i8> %vtbl1
-}
-
-; Bail the optimization if a mask index is out of range.
-define <8 x i8> @tbl1_8x8_out_of_range(<8 x i8> %vec) {
-; CHECK-LABEL: @tbl1_8x8_out_of_range(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VTBL1:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> [[VEC:%.*]], <8 x i8> <i8 8, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; CHECK-NEXT:    ret <8 x i8> [[VTBL1]]
-;
-entry:
-  %vtbl1 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %vec, <8 x i8> <i8 8, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-  ret <8 x i8> %vtbl1
-}
-
-declare <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8>, <8 x i8>)
diff --git a/llvm/test/Transforms/InstCombine/binop-phi-operands.ll b/llvm/test/Transforms/InstCombine/binop-phi-operands.ll
index 9e04983..f0d4ad7 100644
--- a/llvm/test/Transforms/InstCombine/binop-phi-operands.ll
+++ b/llvm/test/Transforms/InstCombine/binop-phi-operands.ll
@@ -653,12 +653,11 @@ define i8 @mul_const_incoming0_speculatable(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ 42, [[ENTRY:%.*]] ], [ [[X:%.*]], [[IF]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ 17, [[ENTRY]] ], [ [[Y:%.*]], [[IF]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ -54, [[ENTRY:%.*]] ], [ [[TMP0]], [[IF]] ]
 ; CHECK-NEXT:    call void @sideeffect()
-; CHECK-NEXT:    [[R:%.*]] = mul i8 [[P0]], [[P1]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/binop-select.ll b/llvm/test/Transforms/InstCombine/binop-select.ll
index 25f624e..9e336ad 100644
--- a/llvm/test/Transforms/InstCombine/binop-select.ll
+++ b/llvm/test/Transforms/InstCombine/binop-select.ll
@@ -335,7 +335,7 @@ define i32 @sub_sel_op1_use(i1 %b) {
 
 define float @fadd_sel_op0(i1 %b, float %x) {
 ; CHECK-LABEL: @fadd_sel_op0(
-; CHECK-NEXT:    [[R:%.*]] = select nnan i1 [[B:%.*]], float 0xFFF0000000000000, float 0x7FF0000000000000
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[B:%.*]], float 0xFFF0000000000000, float 0x7FF0000000000000
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %s = select i1 %b, float 0xFFF0000000000000, float 0x7FF0000000000000
@@ -403,3 +403,171 @@ define i32 @ashr_sel_op1_use(i1 %b) {
   %r = ashr i32 -2, %s
   ret i32 %r
 }
+
+define i8 @commonArgWithOr0(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithOr0(
+; CHECK-NEXT:    [[V3:%.*]] = select i1 [[ARG0:%.*]], i8 17, i8 24
+; CHECK-NEXT:    ret i8 [[V3]]
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 0, i8 8
+  %v2 = or i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i8 @commonArgWithOr1(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithOr1(
+; CHECK-NEXT:    [[V3:%.*]] = select i1 [[ARG0:%.*]], i8 17, i8 23
+; CHECK-NEXT:    ret i8 [[V3]]
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 1, i8 7
+  %v2 = or i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i8 @commonArgWithOr2(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithOr2(
+; CHECK-NEXT:    [[V3:%.*]] = select i1 [[ARG0:%.*]], i8 21, i8 58
+; CHECK-NEXT:    ret i8 [[V3]]
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 21, i8 42
+  %v2 = or i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i8 @commonArgWithAnd0(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithAnd0(
+; CHECK-NEXT:    ret i8 16
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 0, i8 8
+  %v2 = and i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i8 @commonArgWithAnd1(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithAnd1(
+; CHECK-NEXT:    ret i8 16
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 8, i8 1
+  %v2 = and i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i8 @commonArgWithAnd2(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithAnd2(
+; CHECK-NEXT:    [[V2:%.*]] = zext i1 [[ARG0:%.*]] to i8
+; CHECK-NEXT:    [[V3:%.*]] = or disjoint i8 [[V2]], 16
+; CHECK-NEXT:    ret i8 [[V3]]
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 1, i8 7
+  %v2 = and i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i8 @commonArgWithAnd3(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithAnd3(
+; CHECK-NEXT:    [[V2:%.*]] = zext i1 [[ARG0:%.*]] to i8
+; CHECK-NEXT:    [[V3:%.*]] = or disjoint i8 [[V2]], 16
+; CHECK-NEXT:    ret i8 [[V3]]
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 21, i8 42
+  %v2 = and i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i8 @commonArgWithXor0(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithXor0(
+; CHECK-NEXT:    [[V3:%.*]] = select i1 [[ARG0:%.*]], i8 17, i8 24
+; CHECK-NEXT:    ret i8 [[V3]]
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 0, i8 8
+  %v2 = xor i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i8 @commonArgWithXor1(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithXor1(
+; CHECK-NEXT:    [[V2:%.*]] = select i1 [[ARG0:%.*]], i8 8, i8 1
+; CHECK-NEXT:    ret i8 [[V2]]
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 9, i8 1
+  %v2 = xor i8 %v1, %v0
+  ret i8 %v2
+}
+
+define i8 @commonArgWithXor2(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithXor2(
+; CHECK-NEXT:    [[V3:%.*]] = select i1 [[ARG0:%.*]], i8 16, i8 23
+; CHECK-NEXT:    ret i8 [[V3]]
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 1, i8 7
+  %v2 = xor i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i8 @commonArgWithXor3(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithXor3(
+; CHECK-NEXT:    [[V3:%.*]] = select i1 [[ARG0:%.*]], i8 20, i8 61
+; CHECK-NEXT:    ret i8 [[V3]]
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 21, i8 45
+  %v2 = xor i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i8 @commonArgWithAdd0(i1 %arg0) {
+; CHECK-LABEL: @commonArgWithAdd0(
+; CHECK-NEXT:    [[V3:%.*]] = select i1 [[ARG0:%.*]], i8 22, i8 61
+; CHECK-NEXT:    ret i8 [[V3]]
+;
+  %v0 = zext i1 %arg0 to i8
+  %v1 = select i1 %arg0, i8 21, i8 45
+  %v2 = add i8 %v1, %v0
+  %v3 = or i8 %v2, 16
+  ret i8 %v3
+}
+
+define i32 @OrSelectIcmpZero(i32 %a, i32 %b) {
+; CHECK-LABEL: @OrSelectIcmpZero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[OR:%.*]] = select i1 [[CMP]], i32 [[B:%.*]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %cmp = icmp eq i32 %a, 0
+  %sel = select i1 %cmp, i32 %b, i32 0
+  %or = or i32 %sel, %a
+  ret i32 %or
+}
+
+define i32 @OrSelectIcmpNonZero(i32 %a, i32 %b) {
+; CHECK-LABEL: @OrSelectIcmpNonZero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[B:%.*]], i32 42
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEL]], [[A]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %cmp = icmp eq i32 %a, 0
+  %sel = select i1 %cmp, i32 %b, i32 42
+  %or = or i32 %sel, %a
+  ret i32 %or
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
index b3093a9..f0e40f4e 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
@@ -123,8 +123,7 @@ define i8 @udiv_slt_exact(i8 %x) {
 define i8 @canonicalize_icmp_operands(i8 %x) {
 ; CHECK-LABEL: define i8 @canonicalize_icmp_operands(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 119)
-; CHECK-NEXT:    [[S:%.*]] = add nsw i8 [[TMP1]], 8
+; CHECK-NEXT:    [[S:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X]], i8 8)
 ; CHECK-NEXT:    ret i8 [[S]]
 ;
   %add = add nsw i8 %x, 8
diff --git a/llvm/test/Transforms/InstCombine/cast-mul-select.ll b/llvm/test/Transforms/InstCombine/cast-mul-select.ll
index 8aa768c..29c5bb5 100644
--- a/llvm/test/Transforms/InstCombine/cast-mul-select.ll
+++ b/llvm/test/Transforms/InstCombine/cast-mul-select.ll
@@ -91,22 +91,18 @@ define i8 @select2(i1 %cond, i8 %x, i8 %y, i8 %z) {
 
 define i32 @eval_trunc_multi_use_in_one_inst(i32 %x) {
 ; CHECK-LABEL: @eval_trunc_multi_use_in_one_inst(
-; CHECK-NEXT:    [[Z:%.*]] = zext i32 [[X:%.*]] to i64
-; CHECK-NEXT:    [[A:%.*]] = add nuw nsw i64 [[Z]], 15
-; CHECK-NEXT:    [[M:%.*]] = mul i64 [[A]], [[A]]
-; CHECK-NEXT:    [[T:%.*]] = trunc i64 [[M]] to i32
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[T:%.*]] = mul i32 [[A]], [[A]]
 ; CHECK-NEXT:    ret i32 [[T]]
 ;
 ; DBGINFO-LABEL: @eval_trunc_multi_use_in_one_inst(
-; DBGINFO-NEXT:    [[Z:%.*]] = zext i32 [[X:%.*]] to i64, !dbg [[DBG57:![0-9]+]]
-; DBGINFO-NEXT:      #dbg_value(i64 [[Z]], [[META52:![0-9]+]], !DIExpression(), [[DBG57]])
-; DBGINFO-NEXT:    [[A:%.*]] = add nuw nsw i64 [[Z]], 15, !dbg [[DBG58:![0-9]+]]
-; DBGINFO-NEXT:      #dbg_value(i64 [[A]], [[META54:![0-9]+]], !DIExpression(), [[DBG58]])
-; DBGINFO-NEXT:    [[M:%.*]] = mul i64 [[A]], [[A]], !dbg [[DBG59:![0-9]+]]
-; DBGINFO-NEXT:      #dbg_value(i64 [[M]], [[META55:![0-9]+]], !DIExpression(), [[DBG59]])
-; DBGINFO-NEXT:    [[T:%.*]] = trunc i64 [[M]] to i32, !dbg [[DBG60:![0-9]+]]
-; DBGINFO-NEXT:      #dbg_value(i32 [[T]], [[META56:![0-9]+]], !DIExpression(), [[DBG60]])
-; DBGINFO-NEXT:    ret i32 [[T]], !dbg [[DBG61:![0-9]+]]
+; DBGINFO-NEXT:      #dbg_value(i32 [[X:%.*]], [[META52:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_stack_value), [[META57:![0-9]+]])
+; DBGINFO-NEXT:    [[A:%.*]] = add i32 [[X]], 15, !dbg [[DBG58:![0-9]+]]
+; DBGINFO-NEXT:      #dbg_value(i32 [[X]], [[META54:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_plus_uconst, 15, DW_OP_stack_value), [[DBG58]])
+; DBGINFO-NEXT:    [[M:%.*]] = mul i32 [[A]], [[A]], !dbg [[DBG59:![0-9]+]]
+; DBGINFO-NEXT:      #dbg_value(!DIArgList(i32 [[X]], i32 [[X]]), [[META55:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_plus_uconst, 15, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_plus_uconst, 15, DW_OP_mul, DW_OP_stack_value), [[DBG59]])
+; DBGINFO-NEXT:      #dbg_value(i32 [[M]], [[META56:![0-9]+]], !DIExpression(), [[META60:![0-9]+]])
+; DBGINFO-NEXT:    ret i32 [[M]], !dbg [[DBG61:![0-9]+]]
 ;
   %z = zext i32 %x to i64
   %a = add nsw nuw i64 %z, 15
@@ -143,25 +139,21 @@ define i32 @eval_zext_multi_use_in_one_inst(i32 %x) {
 
 define i32 @eval_sext_multi_use_in_one_inst(i32 %x) {
 ; CHECK-LABEL: @eval_sext_multi_use_in_one_inst(
-; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT:    [[A:%.*]] = and i16 [[T]], 14
-; CHECK-NEXT:    [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]]
-; CHECK-NEXT:    [[O:%.*]] = or disjoint i16 [[M]], -32768
-; CHECK-NEXT:    [[R:%.*]] = sext i16 [[O]] to i32
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], 14
+; CHECK-NEXT:    [[M:%.*]] = mul nuw nsw i32 [[A]], [[A]]
+; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[M]], -32768
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 ; DBGINFO-LABEL: @eval_sext_multi_use_in_one_inst(
-; DBGINFO-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i16, !dbg [[DBG81:![0-9]+]]
-; DBGINFO-NEXT:      #dbg_value(i16 [[T]], [[META76:![0-9]+]], !DIExpression(), [[DBG81]])
-; DBGINFO-NEXT:    [[A:%.*]] = and i16 [[T]], 14, !dbg [[DBG82:![0-9]+]]
-; DBGINFO-NEXT:      #dbg_value(i16 [[A]], [[META77:![0-9]+]], !DIExpression(), [[DBG82]])
-; DBGINFO-NEXT:    [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]], !dbg [[DBG83:![0-9]+]]
-; DBGINFO-NEXT:      #dbg_value(i16 [[M]], [[META78:![0-9]+]], !DIExpression(), [[DBG83]])
-; DBGINFO-NEXT:    [[O:%.*]] = or disjoint i16 [[M]], -32768, !dbg [[DBG84:![0-9]+]]
-; DBGINFO-NEXT:      #dbg_value(i16 [[O]], [[META79:![0-9]+]], !DIExpression(), [[DBG84]])
-; DBGINFO-NEXT:    [[R:%.*]] = sext i16 [[O]] to i32, !dbg [[DBG85:![0-9]+]]
-; DBGINFO-NEXT:      #dbg_value(i32 [[R]], [[META80:![0-9]+]], !DIExpression(), [[DBG85]])
-; DBGINFO-NEXT:    ret i32 [[R]], !dbg [[DBG86:![0-9]+]]
+; DBGINFO-NEXT:      #dbg_value(i32 [[X:%.*]], [[META76:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_stack_value), [[META81:![0-9]+]])
+; DBGINFO-NEXT:    [[A:%.*]] = and i32 [[X]], 14, !dbg [[DBG82:![0-9]+]]
+; DBGINFO-NEXT:      #dbg_value(i32 [[X]], [[META77:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_constu, 14, DW_OP_and, DW_OP_stack_value), [[DBG82]])
+; DBGINFO-NEXT:    [[M:%.*]] = mul nuw nsw i32 [[A]], [[A]], !dbg [[DBG83:![0-9]+]]
+; DBGINFO-NEXT:      #dbg_value(!DIArgList(i32 [[X]], i32 [[X]]), [[META78:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_constu, 14, DW_OP_and, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_constu, 14, DW_OP_and, DW_OP_mul, DW_OP_stack_value), [[DBG83]])
+; DBGINFO-NEXT:    [[O:%.*]] = or disjoint i32 [[M]], -32768, !dbg [[DBG84:![0-9]+]]
+; DBGINFO-NEXT:      #dbg_value(!DIArgList(i32 [[X]], i32 [[X]]), [[META79:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_constu, 14, DW_OP_and, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_constu, 14, DW_OP_and, DW_OP_mul, DW_OP_constu, 18446744073709518848, DW_OP_or, DW_OP_stack_value), [[DBG84]])
+; DBGINFO-NEXT:      #dbg_value(i32 [[O]], [[META80:![0-9]+]], !DIExpression(), [[META85:![0-9]+]])
+; DBGINFO-NEXT:    ret i32 [[O]], !dbg [[DBG86:![0-9]+]]
 ;
   %t = trunc i32 %x to i16
   %a = and i16 %t, 14
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index cc0f2e79..46deb29 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -2238,3 +2238,147 @@ define i32 @test95(i32 %x) {
   %5 = zext i8 %4 to i32
   ret i32 %5
 }
+
+define i16 @test96(i16 %x, i16 %y) {
+; ALL-LABEL: @test96(
+; ALL-NEXT:    [[A:%.*]] = add i16 [[X:%.*]], [[Y:%.*]]
+; ALL-NEXT:    [[B:%.*]] = add i16 [[A]], 5
+; ALL-NEXT:    [[C:%.*]] = mul i16 [[A]], 3
+; ALL-NEXT:    [[T:%.*]] = or i16 [[B]], [[C]]
+; ALL-NEXT:    ret i16 [[T]]
+;
+  %zx = zext i16 %x to i32
+  %zy = zext i16 %y to i32
+  %a = add i32 %zx, %zy
+  %b = add i32 %a, 5
+  %c = mul i32 %a, 3
+  %d = or i32 %b, %c
+  %t = trunc i32 %d to i16
+  ret i16 %t
+}
+
+define i16 @test97(i16 %x, i16 %y) {
+; ALL-LABEL: @test97(
+; ALL-NEXT:    [[ZX:%.*]] = zext i16 [[X:%.*]] to i32
+; ALL-NEXT:    [[ZY:%.*]] = zext i16 [[Y:%.*]] to i32
+; ALL-NEXT:    [[A:%.*]] = add nuw nsw i32 [[ZX]], [[ZY]]
+; ALL-NEXT:    [[B:%.*]] = add nuw nsw i32 [[A]], 5
+; ALL-NEXT:    [[C:%.*]] = mul nuw nsw i32 [[A]], 3
+; ALL-NEXT:    [[D:%.*]] = or i32 [[B]], [[C]]
+; ALL-NEXT:    call void @use_i32(i32 [[A]])
+; ALL-NEXT:    [[T:%.*]] = trunc i32 [[D]] to i16
+; ALL-NEXT:    ret i16 [[T]]
+;
+  %zx = zext i16 %x to i32
+  %zy = zext i16 %y to i32
+  %a = add i32 %zx, %zy
+  %b = add i32 %a, 5
+  %c = mul i32 %a, 3
+  %d = or i32 %b, %c
+  call void @use_i32(i32 %a)
+  %t = trunc i32 %d to i16
+  ret i16 %t
+}
+
+; expected not to narrow operations to i16 due to a loop in use chains
+define i16 @test98(i16 %x, i16 %n) {
+; ALL-LABEL: @test98(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[Z:%.*]] = zext i16 [[X:%.*]] to i32
+; ALL-NEXT:    br label [[LOOP:%.*]]
+; ALL:       loop:
+; ALL-NEXT:    [[P:%.*]] = phi i32 [ [[Z]], [[ENTRY:%.*]] ], [ [[A:%.*]], [[LOOP]] ]
+; ALL-NEXT:    [[A]] = add i32 [[P]], 1
+; ALL-NEXT:    [[T:%.*]] = trunc i32 [[A]] to i16
+; ALL-NEXT:    [[COND:%.*]] = icmp ugt i16 [[N:%.*]], [[T]]
+; ALL-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; ALL:       exit:
+; ALL-NEXT:    ret i16 [[T]]
+;
+entry:
+  %z = zext i16 %x to i32
+  br label %loop
+
+loop:
+  %p = phi i32 [ %z, %entry ], [ %a, %loop ]
+  %a = add i32 %p, 1
+  %t = trunc i32 %a to i16
+  %cond = icmp ult i16 %t, %n
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret i16 %t
+}
+
+define i32 @test99(i32 %x, i32 %y) {
+; ALL-LABEL: @test99(
+; ALL-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; ALL-NEXT:    [[B:%.*]] = add i32 [[A]], 5
+; ALL-NEXT:    [[C:%.*]] = mul i32 [[A]], 3
+; ALL-NEXT:    [[D:%.*]] = or i32 [[B]], [[C]]
+; ALL-NEXT:    [[SEXT:%.*]] = shl i32 [[D]], 16
+; ALL-NEXT:    [[S:%.*]] = ashr exact i32 [[SEXT]], 16
+; ALL-NEXT:    ret i32 [[S]]
+;
+  %tx = trunc i32 %x to i16
+  %ty = trunc i32 %y to i16
+  %a = add i16 %tx, %ty
+  %b = add i16 %a, 5
+  %c = mul i16 %a, 3
+  %d = or i16 %b, %c
+  %t = sext i16 %d to i32
+  ret i32 %t
+}
+
+define i32 @test100(i32 %x, i32 %y) {
+; ALL-LABEL: @test100(
+; ALL-NEXT:    [[TX:%.*]] = trunc i32 [[X:%.*]] to i8
+; ALL-NEXT:    [[TY:%.*]] = trunc i32 [[Y:%.*]] to i8
+; ALL-NEXT:    [[A:%.*]] = add i8 [[TX]], [[TY]]
+; ALL-NEXT:    [[B:%.*]] = add i8 [[A]], 5
+; ALL-NEXT:    [[C:%.*]] = mul i8 [[A]], 3
+; ALL-NEXT:    [[D:%.*]] = or i8 [[B]], [[C]]
+; ALL-NEXT:    call void @use_i8(i8 [[A]])
+; ALL-NEXT:    [[T:%.*]] = sext i8 [[D]] to i32
+; ALL-NEXT:    ret i32 [[T]]
+;
+  %tx = trunc i32 %x to i8
+  %ty = trunc i32 %y to i8
+  %a = add i8 %tx, %ty
+  %b = add i8 %a, 5
+  %c = mul i8 %a, 3
+  %d = or i8 %b, %c
+  call void @use_i8(i8 %a)
+  %t = sext i8 %d to i32
+  ret i32 %t
+}
+
+; expected not to extend operations to i32 due to a loop in use chains
+define i32 @test101(i32 %x, i8 %n) {
+; ALL-LABEL: @test101(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
+; ALL-NEXT:    br label [[LOOP:%.*]]
+; ALL:       loop:
+; ALL-NEXT:    [[P:%.*]] = phi i8 [ [[T]], [[ENTRY:%.*]] ], [ [[A:%.*]], [[LOOP]] ]
+; ALL-NEXT:    [[A]] = add i8 [[P]], 1
+; ALL-NEXT:    [[COND:%.*]] = icmp ult i8 [[A]], [[N:%.*]]
+; ALL-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; ALL:       exit:
+; ALL-NEXT:    [[S:%.*]] = sext i8 [[A]] to i32
+; ALL-NEXT:    ret i32 [[S]]
+;
+entry:
+  %t = trunc i32 %x to i8
+  br label %loop
+
+loop:
+  %p = phi i8 [ %t, %entry ], [ %a, %loop ]
+  %a = add i8 %p, 1
+  %cond = icmp ult i8 %a, %n
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  %s = sext i8 %a to i32
+  ret i32 %s
+}
diff --git a/llvm/test/Transforms/InstCombine/catchswitch-phi.ll b/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
index 720f525..562dded 100644
--- a/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
+++ b/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
@@ -94,18 +94,17 @@ define void @test1() personality ptr @__gxx_wasm_personality_v0 {
 ; CHECK-NEXT:    [[CALL:%.*]] = invoke i32 @baz()
 ; CHECK-NEXT:            to label [[INVOKE_CONT1:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
 ; CHECK:       invoke.cont1:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK-NEXT:    [[TOBOOL_INIT:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_INIT]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[AP_0:%.*]] = phi i8 [ 1, [[IF_THEN]] ], [ 0, [[INVOKE_CONT1]] ]
 ; CHECK-NEXT:    invoke void @foo()
 ; CHECK-NEXT:            to label [[INVOKE_CONT2:%.*]] unwind label [[CATCH_DISPATCH]]
 ; CHECK:       invoke.cont2:
 ; CHECK-NEXT:    br label [[TRY_CONT:%.*]]
 ; CHECK:       catch.dispatch:
-; CHECK-NEXT:    [[AP_1:%.*]] = phi i8 [ [[AP_0]], [[IF_END]] ], [ 0, [[INVOKE_CONT]] ]
+; CHECK-NEXT:    [[AP_1:%.*]] = phi i1 [ [[TOBOOL_INIT]], [[IF_END]] ], [ false, [[INVOKE_CONT]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = catchswitch within none [label %catch.start] unwind label [[CATCH_DISPATCH1]]
 ; CHECK:       catch.start:
 ; CHECK-NEXT:    [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null]
@@ -116,11 +115,10 @@ define void @test1() personality ptr @__gxx_wasm_personality_v0 {
 ; CHECK-NEXT:    invoke void @llvm.wasm.rethrow() #[[ATTR0:[0-9]+]] [ "funclet"(token [[TMP1]]) ]
 ; CHECK-NEXT:            to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH1]]
 ; CHECK:       catch.dispatch1:
-; CHECK-NEXT:    [[AP_2:%.*]] = phi i8 [ [[AP_1]], [[CATCH_DISPATCH]] ], [ [[AP_1]], [[RETHROW]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TOBOOL1:%.*]] = phi i1 [ [[AP_1]], [[CATCH_DISPATCH]] ], [ [[AP_1]], [[RETHROW]] ], [ false, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = catchswitch within none [label %catch.start1] unwind to caller
 ; CHECK:       catch.start1:
 ; CHECK-NEXT:    [[TMP3:%.*]] = catchpad within [[TMP2]] [ptr null]
-; CHECK-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[AP_2]] to i1
 ; CHECK-NEXT:    br i1 [[TOBOOL1]], label [[IF_THEN1:%.*]], label [[IF_END1:%.*]]
 ; CHECK:       if.then1:
 ; CHECK-NEXT:    br label [[IF_END1]]
diff --git a/llvm/test/Transforms/InstCombine/debuginfo-dce.ll b/llvm/test/Transforms/InstCombine/debuginfo-dce.ll
index c1d7c30..ec90779 100644
--- a/llvm/test/Transforms/InstCombine/debuginfo-dce.ll
+++ b/llvm/test/Transforms/InstCombine/debuginfo-dce.ll
@@ -125,15 +125,15 @@ attributes #1 = { nounwind readnone }
 !19 = !DILocation(line: 6, column: 17, scope: !14)
 !20 = !DIExpression(DW_OP_plus_uconst, 0)
 !21 = !DILocation(line: 11, column: 1, scope: !14)
-!22 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17)
+!22 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
 !23 = !DILocation(line: 6, column: 17, scope: !22)
 !24 = !DILocalVariable(name: "entry", scope: !22, file: !1, line: 6, type: !4)
-!25 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17)
+!25 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
 !26 = !DILocation(line: 6, column: 17, scope: !25)
 !27 = !DILocalVariable(name: "entry", scope: !25, file: !1, line: 6, type: !4)
-!28 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17)
+!28 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
 !29 = !DILocation(line: 6, column: 17, scope: !28)
 !30 = !DILocalVariable(name: "entry", scope: !28, file: !1, line: 6, type: !4)
-!31 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17)
+!31 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
 !32 = !DILocation(line: 6, column: 17, scope: !31)
 !33 = !DILocalVariable(name: "entry", scope: !31, file: !1, line: 6, type: !4)
diff --git a/llvm/test/Transforms/InstCombine/dont-distribute-phi.ll b/llvm/test/Transforms/InstCombine/dont-distribute-phi.ll
index 45e47d8..5e90d4b 100644
--- a/llvm/test/Transforms/InstCombine/dont-distribute-phi.ll
+++ b/llvm/test/Transforms/InstCombine/dont-distribute-phi.ll
@@ -7,7 +7,7 @@
 define zeroext i1 @foo(i32 %arg) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[ARG:%.*]], 37
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[ARG:%.*]], 37
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[BB_ELSE:%.*]], label [[BB_THEN:%.*]]
 ; CHECK:       bb_then:
 ; CHECK-NEXT:    call void @bar()
@@ -16,8 +16,7 @@ define zeroext i1 @foo(i32 %arg) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[ARG]], 17
 ; CHECK-NEXT:    br label [[BB_EXIT]]
 ; CHECK:       bb_exit:
-; CHECK-NEXT:    [[PHI1:%.*]] = phi i1 [ [[CMP2]], [[BB_ELSE]] ], [ undef, [[BB_THEN]] ]
-; CHECK-NEXT:    [[AND1:%.*]] = and i1 [[PHI1]], [[CMP1]]
+; CHECK-NEXT:    [[AND1:%.*]] = phi i1 [ [[CMP2]], [[BB_THEN]] ], [ false, [[BB_ELSE]] ]
 ; CHECK-NEXT:    ret i1 [[AND1]]
 ;
 
@@ -43,7 +42,7 @@ bb_exit:
 define zeroext i1 @foo_logical(i32 %arg) {
 ; CHECK-LABEL: @foo_logical(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[ARG:%.*]], 37
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[ARG:%.*]], 37
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[BB_ELSE:%.*]], label [[BB_THEN:%.*]]
 ; CHECK:       bb_then:
 ; CHECK-NEXT:    call void @bar()
@@ -52,8 +51,7 @@ define zeroext i1 @foo_logical(i32 %arg) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[ARG]], 17
 ; CHECK-NEXT:    br label [[BB_EXIT]]
 ; CHECK:       bb_exit:
-; CHECK-NEXT:    [[PHI1:%.*]] = phi i1 [ [[CMP2]], [[BB_ELSE]] ], [ undef, [[BB_THEN]] ]
-; CHECK-NEXT:    [[AND1:%.*]] = and i1 [[PHI1]], [[CMP1]]
+; CHECK-NEXT:    [[AND1:%.*]] = phi i1 [ [[CMP2]], [[BB_THEN]] ], [ false, [[BB_ELSE]] ]
 ; CHECK-NEXT:    ret i1 [[AND1]]
 ;
 
diff --git a/llvm/test/Transforms/InstCombine/float-shrink-compare.ll b/llvm/test/Transforms/InstCombine/float-shrink-compare.ll
index 77b6ed7..6383fef 100644
--- a/llvm/test/Transforms/InstCombine/float-shrink-compare.ll
+++ b/llvm/test/Transforms/InstCombine/float-shrink-compare.ll
@@ -463,6 +463,34 @@ define i1 @test18(float %x, float %y, float %z) {
   ret i1 %5
 }
 
+define i1 @test_fminimum_num(float %x, float %y, float %z) {
+; CHECK-LABEL: @test_fminimum_num(
+; CHECK-NEXT:    [[FMINIMUM_NUMF:%.*]] = call float @llvm.minimumnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp oeq float [[FMINIMUM_NUMF]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %1 = fpext float %x to double
+  %2 = fpext float %y to double
+  %3 = call double @fminimum_num(double %1, double %2) nounwind
+  %4 = fpext float %z to double
+  %5 = fcmp oeq double %3, %4
+  ret i1 %5
+}
+
+define i1 @test_fmaximum_num(float %x, float %y, float %z) {
+; CHECK-LABEL: @test_fmaximum_num(
+; CHECK-NEXT:    [[FMAXIMUM_NUMF:%.*]] = call float @llvm.maximumnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp oeq float [[FMAXIMUM_NUMF]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %1 = fpext float %x to double
+  %2 = fpext float %y to double
+  %3 = call double @fmaximum_num(double %1, double %2) nounwind
+  %4 = fpext float %z to double
+  %5 = fcmp oeq double %3, %4
+  ret i1 %5
+}
+
 define i1 @test19(float %x, float %y, float %z) {
 ; CHECK-LABEL: @test19(
 ; CHECK-NEXT:    [[COPYSIGNF:%.*]] = call float @copysignf(float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]]
@@ -518,6 +546,8 @@ declare double @roundeven(double) nounwind readnone
 declare double @trunc(double) nounwind readnone
 declare double @fmin(double, double) nounwind readnone
 declare double @fmax(double, double) nounwind readnone
+declare double @fminimum_num(double, double) nounwind readnone
+declare double @fmaximum_num(double, double) nounwind readnone
 
 declare double @llvm.fabs.f64(double) nounwind readnone
 declare double @llvm.ceil.f64(double) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index cd4a8e3..3cbf709 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -1222,7 +1222,7 @@ define <2 x double> @negate_if_true_wrong_constant(<2 x double> %px, i1 %cond) {
 ; X *fast (C ? 1.0 : 0.0) -> C ? X : 0.0
 define float @fmul_select(float %x, i1 %c) {
 ; CHECK-LABEL: @fmul_select(
-; CHECK-NEXT:    [[MUL:%.*]] = select fast i1 [[C:%.*]], float [[X:%.*]], float 0.000000e+00
+; CHECK-NEXT:    [[MUL:%.*]] = select i1 [[C:%.*]], float [[X:%.*]], float 0.000000e+00
 ; CHECK-NEXT:    ret float [[MUL]]
 ;
   %sel = select i1 %c, float 1.0, float 0.0
@@ -1233,7 +1233,7 @@ define float @fmul_select(float %x, i1 %c) {
 ; X *fast (C ? 1.0 : 0.0) -> C ? X : 0.0
 define <2 x float> @fmul_select_vec(<2 x float> %x, i1 %c) {
 ; CHECK-LABEL: @fmul_select_vec(
-; CHECK-NEXT:    [[MUL:%.*]] = select fast i1 [[C:%.*]], <2 x float> [[X:%.*]], <2 x float> zeroinitializer
+; CHECK-NEXT:    [[MUL:%.*]] = select i1 [[C:%.*]], <2 x float> [[X:%.*]], <2 x float> zeroinitializer
 ; CHECK-NEXT:    ret <2 x float> [[MUL]]
 ;
   %sel = select i1 %c, <2 x float> <float 1.0, float 1.0>, <2 x float> zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/free-inversion.ll b/llvm/test/Transforms/InstCombine/free-inversion.ll
index 4b69a5e77..2e8e75c 100644
--- a/llvm/test/Transforms/InstCombine/free-inversion.ll
+++ b/llvm/test/Transforms/InstCombine/free-inversion.ll
@@ -563,10 +563,10 @@ define i1 @test_inv_free(i1 %c1, i1 %c2, i1 %c3, i1 %c4) {
 ; CHECK:       b2:
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       b3:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i1 [[C3:%.*]], [[C4:%.*]]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[VAL_NOT:%.*]] = phi i1 [ false, [[B1]] ], [ true, [[B2]] ], [ [[C3:%.*]], [[B3]] ]
-; CHECK-NEXT:    [[COND_NOT:%.*]] = and i1 [[VAL_NOT]], [[C4:%.*]]
+; CHECK-NEXT:    [[COND_NOT:%.*]] = phi i1 [ false, [[B1]] ], [ [[C4]], [[B2]] ], [ [[TMP0]], [[B3]] ]
 ; CHECK-NEXT:    br i1 [[COND_NOT]], label [[B5:%.*]], label [[B4:%.*]]
 ; CHECK:       b4:
 ; CHECK-NEXT:    ret i1 true
diff --git a/llvm/test/Transforms/InstCombine/get_vector_length.ll b/llvm/test/Transforms/InstCombine/get_vector_length.ll
new file mode 100644
index 0000000..122beea
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/get_vector_length.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine,verify -S | FileCheck %s
+
+define i32 @cnt_known_lt() {
+; CHECK-LABEL: define i32 @cnt_known_lt() {
+; CHECK-NEXT:    ret i32 1
+;
+  %x = call i32 @llvm.experimental.get.vector.length(i32 1, i32 2, i1 false)
+  ret i32 %x
+}
+
+define i32 @cnt_not_known_lt() {
+; CHECK-LABEL: define i32 @cnt_not_known_lt() {
+; CHECK-NEXT:    [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 2, i32 1, i1 false)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 false)
+  ret i32 %x
+}
+
+define i32 @cnt_known_lt_scalable() vscale_range(2, 4) {
+; CHECK-LABEL: define i32 @cnt_known_lt_scalable(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    ret i32 2
+;
+  %x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 true)
+  ret i32 %x
+}
+
+define i32 @cnt_not_known_lt_scalable() {
+; CHECK-LABEL: define i32 @cnt_not_known_lt_scalable() {
+; CHECK-NEXT:    [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 2, i32 1, i1 true)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 true)
+  ret i32 %x
+}
+
+define i32 @cnt_known_lt_runtime(i32 %x) {
+; CHECK-LABEL: define i32 @cnt_known_lt_runtime(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i32 [[X]], 4
+; CHECK-NEXT:    call void @llvm.assume(i1 [[ICMP]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %icmp = icmp ule i32 %x, 3
+  call void @llvm.assume(i1 %icmp)
+  %y = call i32 @llvm.experimental.get.vector.length(i32 %x, i32 3, i1 false)
+  ret i32 %y
+}
+
+define i32 @cnt_known_lt_runtime_trunc(i64 %x) {
+; CHECK-LABEL: define i32 @cnt_known_lt_runtime_trunc(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[X]], 4
+; CHECK-NEXT:    call void @llvm.assume(i1 [[ICMP]])
+; CHECK-NEXT:    [[Y:%.*]] = trunc nuw nsw i64 [[X]] to i32
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %icmp = icmp ule i64 %x, 3
+  call void @llvm.assume(i1 %icmp)
+  %y = call i32 @llvm.experimental.get.vector.length(i64 %x, i32 3, i1 false)
+  ret i32 %y
+}
+
+; FIXME: We should be able to deduce the constant range from AssumptionCache
+; rather than relying on KnownBits, which in this case only knows x <= 3.
+define i32 @cnt_known_lt_runtime_assumption(i32 %x) {
+; CHECK-LABEL: define i32 @cnt_known_lt_runtime_assumption(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i32 [[X]], 3
+; CHECK-NEXT:    call void @llvm.assume(i1 [[ICMP]])
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[X]], i32 2, i1 false)
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %icmp = icmp ule i32 %x, 2
+  call void @llvm.assume(i1 %icmp)
+  %y = call i32 @llvm.experimental.get.vector.length(i32 %x, i32 2, i1 false)
+  ret i32 %y
+}
+
+
+define i32 @cnt_known_lt_i16() {
+; CHECK-LABEL: define i32 @cnt_known_lt_i16() {
+; CHECK-NEXT:    ret i32 1
+;
+  %x = call i32 @llvm.experimental.get.vector.length(i16 1, i32 2, i1 false)
+  ret i32 %x
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll
index 8449c7c..2806fee 100644
--- a/llvm/test/Transforms/InstCombine/icmp-add.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-add.ll
@@ -3440,3 +3440,79 @@ define i1 @val_is_aligend_pred_mismatch(i32 %num) {
   %_0 = icmp sge i32 %num.masked, %num
   ret i1 %_0
 }
+
+define i1 @icmp_samesign_with_nsw_add(i32 %arg0) {
+; CHECK-LABEL: @icmp_samesign_with_nsw_add(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V1:%.*]] = icmp sgt i32 [[ARG0:%.*]], 25
+; CHECK-NEXT:    ret i1 [[V1]]
+;
+entry:
+  %v0 = add nsw i32 %arg0, -18
+  %v1 = icmp samesign ugt i32 %v0, 7
+  ret i1 %v1
+}
+
+; Negative test; Fold shouldn't fire since -124 - 12 causes signed overflow
+define i1 @icmp_samesign_with_nsw_add_neg(i8 %arg0) {
+; CHECK-LABEL: @icmp_samesign_with_nsw_add_neg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[ARG0:%.*]], -121
+; CHECK-NEXT:    [[V1:%.*]] = icmp ult i8 [[TMP0]], 123
+; CHECK-NEXT:    ret i1 [[V1]]
+;
+entry:
+  %v0 = add nsw i8 %arg0, 12
+  %v1 = icmp samesign ugt i8 %v0, -124
+  ret i1 %v1
+}
+
+define i1 @icmp_with_nuw_add(i32 %arg0) {
+; CHECK-LABEL: @icmp_with_nuw_add(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V1:%.*]] = icmp ult i32 [[ARG0:%.*]], 11
+; CHECK-NEXT:    ret i1 [[V1]]
+;
+entry:
+  %v0 = add nuw i32 %arg0, 7
+  %v1 = icmp ult i32 %v0, 18
+  ret i1 %v1
+}
+
+define i1 @icmp_partial_negative_samesign_ult_to_slt(i8 range(i8 -1, 5) %x) {
+; CHECK-LABEL: @icmp_partial_negative_samesign_ult_to_slt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %add = add nsw i8 %x, -5
+  %cmp = icmp samesign ult i8 %add, -3
+  ret i1 %cmp
+}
+
+define i1 @icmp_pos_samesign_slt_to_ult(i8 range(i8 1, 5) %x) {
+; CHECK-LABEL: @icmp_pos_samesign_slt_to_ult(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp samesign ult i8 [[X:%.*]], 2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %add = add nsw i8 %x, 1
+  %cmp = icmp samesign slt i8 %add, 3
+  ret i1 %cmp
+}
+
+; Since higher priority is given to unsigned predicates, the predicate should
+; not change
+define i1 @icmp_nuw_nsw_samesign(i32 %arg0) {
+; CHECK-LABEL: @icmp_nuw_nsw_samesign(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ARG0:%.*]], 9
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %v0 = add nuw nsw i32 %arg0, 1
+  %cmp = icmp samesign ult i32 %v0, 10
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
index 653b818..23a278c 100644
--- a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
@@ -57,11 +57,10 @@ define void @PR33765(i8 %beth) {
 ; CHECK-LABEL: @PR33765(
 ; CHECK-NEXT:    br i1 false, label [[IF_THEN9:%.*]], label [[IF_THEN9]]
 ; CHECK:       if.then9:
-; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32
-; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]]
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i16
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i16 [[CONV]], [[CONV]]
 ; CHECK-NEXT:    [[TINKY:%.*]] = load i16, ptr @glob, align 2
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc nuw i32 [[MUL]] to i16
-; CHECK-NEXT:    [[CONV14:%.*]] = and i16 [[TINKY]], [[TMP1]]
+; CHECK-NEXT:    [[CONV14:%.*]] = and i16 [[MUL]], [[TINKY]]
 ; CHECK-NEXT:    store i16 [[CONV14]], ptr @glob, align 2
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/InstCombine/icmp-select.ll b/llvm/test/Transforms/InstCombine/icmp-select.ll
index c6c0ba3..c29527a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-select.ll
@@ -835,3 +835,120 @@ define i1 @discr_eq_constantexpr(ptr %p) {
   %cmp = icmp eq i64 %sub, -1
   ret i1 %cmp
 }
+
+define i1 @shl_nsw_eq_simplify_zero_to_self(i8 %a, i1 %cond) {
+; CHECK-LABEL: @shl_nsw_eq_simplify_zero_to_self(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[COND:%.*]] to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %a, 3
+  %sel = select i1 %cond, i8 8, i8 0
+  %cmp = icmp eq i8 %shl, %sel
+  ret i1 %cmp
+}
+
+define i1 @shl_nsw_eq(i8 %a, i1 %cond) {
+; CHECK-LABEL: @shl_nsw_eq(
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND:%.*]], i8 1, i8 -15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %a, 3
+  %sel = select i1 %cond, i8 8, i8 -120
+  %cmp = icmp eq i8 %shl, %sel
+  ret i1 %cmp
+}
+
+define i1 @shl_nuw_eq(i8 %a, i1 %cond) {
+; CHECK-LABEL: @shl_nuw_eq(
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND:%.*]], i8 1, i8 17
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nuw i8 %a, 3
+  %sel = select i1 %cond, i8 8, i8 -120
+  %cmp = icmp eq i8 %shl, %sel
+  ret i1 %cmp
+}
+
+define i1 @shl_nsw_failed_to_simplify(i8 %a, i1 %cond) {
+; CHECK-LABEL: @shl_nsw_failed_to_simplify(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[A:%.*]], 1
+; CHECK-NEXT:    [[NOT_COND:%.*]] = xor i1 [[COND:%.*]], true
+; CHECK-NEXT:    [[CMP:%.*]] = select i1 [[NOT_COND]], i1 [[CMP1]], i1 false
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %a, 4
+  %sel = select i1 %cond, i8 8, i8 16
+  %cmp = icmp eq i8 %shl, %sel
+  ret i1 %cmp
+}
+
+define i1 @shl_nuw_failed_to_simplify(i8 %a, i1 %cond) {
+; CHECK-LABEL: @shl_nuw_failed_to_simplify(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[A:%.*]], 4
+; CHECK-NEXT:    [[NOT_COND:%.*]] = xor i1 [[COND:%.*]], true
+; CHECK-NEXT:    [[CMP:%.*]] = select i1 [[NOT_COND]], i1 [[CMP1]], i1 false
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nuw i8 %a, 3
+  %sel = select i1 %cond, i8 -1, i8 32
+  %cmp = icmp eq i8 %shl, %sel
+  ret i1 %cmp
+}
+
+define i1 @shl_failed_to_simplify(i8 %a, i1 %cond) {
+; CHECK-LABEL: @shl_failed_to_simplify(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[A:%.*]], 3
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], i8 8, i8 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[SHL]], [[SEL]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i8 %a, 3
+  %sel = select i1 %cond, i8 8, i8 32
+  %cmp = icmp eq i8 %shl, %sel
+  ret i1 %cmp
+}
+
+define i1 @shl_nuw_ne(i8 %a, i8 %b, i8 %c, i1 %cond) {
+; CHECK-LABEL: @shl_nuw_ne(
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND:%.*]], i8 [[B:%.*]], i8 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl_a = shl nuw i8 %a, 3
+  %shl_b = shl nuw i8 %b, 3
+  %sel = select i1 %cond, i8 %shl_b, i8 32
+  %cmp = icmp ne i8 %sel, %shl_a
+  ret i1 %cmp
+}
+
+define i1 @shl_const_phi_failed_to_simplify(i64 %indvars, i32 %conv) {
+; CHECK-LABEL: @shl_const_phi_failed_to_simplify(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_SLT:%.*]] = icmp slt i64 [[INDVARS:%.*]], 1
+; CHECK-NEXT:    br i1 [[CMP_SLT]], label [[END:%.*]], label [[THEN:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[CONST_PHI:%.*]] = phi i32 [ 0, [[THEN]] ], [ 65535, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SHL_NUW:%.*]] = shl nuw i32 [[CONV:%.*]], 31
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP_SLT]], i32 [[CONST_PHI]], i32 [[SHL_NUW]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SEL]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %cmp_slt = icmp slt i64 %indvars, 1
+  br i1 %cmp_slt, label %end, label %then
+
+then:
+  br label %end
+
+end:
+  %const_phi = phi i32 [ 0, %then ], [ 65535, %entry ]
+  %shl_nuw = shl nuw i32 %conv, 31
+  %sel = select i1 %cmp_slt, i32 %const_phi, i32 %shl_nuw
+  %cmp = icmp eq i32 %sel, 0
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll
index 8aceba0..532e8b0 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll
@@ -579,7 +579,7 @@ define i1 @ashr_ugt_0(i4 %x) {
 define i1 @ashr_ugt_0_multiuse(i4 %x, ptr %p) {
 ; CHECK-LABEL: @ashr_ugt_0_multiuse(
 ; CHECK-NEXT:    [[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i4 [[X]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i4 [[S]], 0
 ; CHECK-NEXT:    store i4 [[S]], ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
@@ -934,7 +934,7 @@ define i1 @lshr_eq_0_multiuse(i8 %x) {
 ; CHECK-LABEL: @lshr_eq_0_multiuse(
 ; CHECK-NEXT:    [[S:%.*]] = lshr i8 [[X:%.*]], 2
 ; CHECK-NEXT:    call void @use(i8 [[S]])
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[S]], 0
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = lshr i8 %x, 2
@@ -947,7 +947,7 @@ define i1 @lshr_ne_0_multiuse(i8 %x) {
 ; CHECK-LABEL: @lshr_ne_0_multiuse(
 ; CHECK-NEXT:    [[S:%.*]] = lshr i8 [[X:%.*]], 2
 ; CHECK-NEXT:    call void @use(i8 [[S]])
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[S]], 0
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = lshr i8 %x, 2
@@ -960,7 +960,7 @@ define i1 @ashr_eq_0_multiuse(i8 %x) {
 ; CHECK-LABEL: @ashr_eq_0_multiuse(
 ; CHECK-NEXT:    [[S:%.*]] = ashr i8 [[X:%.*]], 2
 ; CHECK-NEXT:    call void @use(i8 [[S]])
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[S]], 0
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i8 %x, 2
@@ -973,7 +973,7 @@ define i1 @ashr_ne_0_multiuse(i8 %x) {
 ; CHECK-LABEL: @ashr_ne_0_multiuse(
 ; CHECK-NEXT:    [[S:%.*]] = ashr i8 [[X:%.*]], 2
 ; CHECK-NEXT:    call void @use(i8 [[S]])
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[S]], 0
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i8 %x, 2
@@ -982,6 +982,46 @@ define i1 @ashr_ne_0_multiuse(i8 %x) {
   ret i1 %c
 }
 
+define i1 @lshr_eq_0(i8 %x) {
+; CHECK-LABEL: @lshr_eq_0(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i8 %x, 2
+  %c = icmp eq i8 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshr_ne_0(i8 %x) {
+; CHECK-LABEL: @lshr_ne_0(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i8 %x, 2
+  %c = icmp ne i8 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashr_eq_0(i8 %x) {
+; CHECK-LABEL: @ashr_eq_0(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i8 %x, 2
+  %c = icmp eq i8 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashr_ne_0(i8 %x) {
+; CHECK-LABEL: @ashr_ne_0(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i8 %x, 2
+  %c = icmp ne i8 %s, 0
+  ret i1 %c
+}
+
 define i1 @lshr_exact_eq_0_multiuse(i8 %x) {
 ; CHECK-LABEL: @lshr_exact_eq_0_multiuse(
 ; CHECK-NEXT:    [[S:%.*]] = lshr exact i8 [[X:%.*]], 2
diff --git a/llvm/test/Transforms/InstCombine/icmp-trunc.ll b/llvm/test/Transforms/InstCombine/icmp-trunc.ll
index ad76ef7..431fc2b 100644
--- a/llvm/test/Transforms/InstCombine/icmp-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-trunc.ll
@@ -407,13 +407,9 @@ define i1 @shl1_trunc_ne0_use2(i37 %a) {
   ret i1 %r
 }
 
-; TODO: A > 4
-
 define i1 @shl2_trunc_eq0(i9 %a) {
 ; CHECK-LABEL: @shl2_trunc_eq0(
-; CHECK-NEXT:    [[SHL:%.*]] = shl i9 2, [[A:%.*]]
-; CHECK-NEXT:    [[T:%.*]] = trunc i9 [[SHL]] to i6
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i6 [[T]], 0
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i9 [[A:%.*]], 4
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %shl = shl i9 2, %a
@@ -424,9 +420,7 @@ define i1 @shl2_trunc_eq0(i9 %a) {
 
 define i1 @shl2_trunc_ne0(i9 %a) {
 ; CHECK-LABEL: @shl2_trunc_ne0(
-; CHECK-NEXT:    [[SHL:%.*]] = shl i9 2, [[A:%.*]]
-; CHECK-NEXT:    [[T:%.*]] = trunc i9 [[SHL]] to i6
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i6 [[T]], 0
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i9 [[A:%.*]], 5
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %shl = shl i9 2, %a
@@ -450,9 +444,7 @@ define i1 @shl3_trunc_eq0(i9 %a) {
 
 define <2 x i1> @shl4_trunc_ne0(<2 x i8> %a) {
 ; CHECK-LABEL: @shl4_trunc_ne0(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 4, i8 poison>, [[A:%.*]]
-; CHECK-NEXT:    [[T:%.*]] = trunc <2 x i8> [[SHL]] to <2 x i5>
-; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i5> [[T]], zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[A:%.*]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %shl = shl <2 x i8> <i8 4, i8 poison>, %a
@@ -461,6 +453,16 @@ define <2 x i1> @shl4_trunc_ne0(<2 x i8> %a) {
   ret <2 x i1> %r
 }
 
+define i1 @shl5_trunc_ne0(i9 %a) {
+; CHECK-LABEL: @shl5_trunc_ne0(
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i9 [[A:%.*]], 4
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %shl = shl i9 4, %a
+  %t = trunc i9 %shl to i6
+  %r = icmp ne i6 %t, 0
+  ret i1 %r
+}
 
 ; TODO: A < 5
 
@@ -507,17 +509,9 @@ define i1 @shl1_trunc_ne32(i8 %a) {
 }
 
 define i1 @shl2_trunc_eq8_i32(i32 %a) {
-; DL64-LABEL: @shl2_trunc_eq8_i32(
-; DL64-NEXT:    [[SHL:%.*]] = shl i32 2, [[A:%.*]]
-; DL64-NEXT:    [[TMP1:%.*]] = and i32 [[SHL]], 65534
-; DL64-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 8
-; DL64-NEXT:    ret i1 [[R]]
-;
-; DL8-LABEL: @shl2_trunc_eq8_i32(
-; DL8-NEXT:    [[SHL:%.*]] = shl i32 2, [[A:%.*]]
-; DL8-NEXT:    [[T:%.*]] = trunc i32 [[SHL]] to i16
-; DL8-NEXT:    [[R:%.*]] = icmp eq i16 [[T]], 8
-; DL8-NEXT:    ret i1 [[R]]
+; CHECK-LABEL: @shl2_trunc_eq8_i32(
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[A:%.*]], 2
+; CHECK-NEXT:    ret i1 [[R]]
 ;
   %shl = shl i32 2, %a
   %t = trunc i32 %shl to i16
@@ -526,17 +520,9 @@ define i1 @shl2_trunc_eq8_i32(i32 %a) {
 }
 
 define i1 @shl2_trunc_ne8_i32(i32 %a) {
-; DL64-LABEL: @shl2_trunc_ne8_i32(
-; DL64-NEXT:    [[SHL:%.*]] = shl i32 2, [[A:%.*]]
-; DL64-NEXT:    [[TMP1:%.*]] = and i32 [[SHL]], 65534
-; DL64-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 8
-; DL64-NEXT:    ret i1 [[R]]
-;
-; DL8-LABEL: @shl2_trunc_ne8_i32(
-; DL8-NEXT:    [[SHL:%.*]] = shl i32 2, [[A:%.*]]
-; DL8-NEXT:    [[T:%.*]] = trunc i32 [[SHL]] to i16
-; DL8-NEXT:    [[R:%.*]] = icmp ne i16 [[T]], 8
-; DL8-NEXT:    ret i1 [[R]]
+; CHECK-LABEL: @shl2_trunc_ne8_i32(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[A:%.*]], 2
+; CHECK-NEXT:    ret i1 [[R]]
 ;
   %shl = shl i32 2, %a
   %t = trunc i32 %shl to i16
@@ -544,6 +530,26 @@ define i1 @shl2_trunc_ne8_i32(i32 %a) {
   ret i1 %r
 }
 
+define i1 @neg_shl2_trunc_eq0_i8(i8 %a) {
+; CHECK-LABEL: @neg_shl2_trunc_eq0_i8(
+; CHECK-NEXT:    ret i1 true
+;
+  %shl = shl i8 128, %a
+  %t = trunc i8 %shl to i6
+  %r = icmp eq i6 %t, 0
+  ret i1 %r
+}
+
+define i1 @neg_shl2_trunc_ne0_i8(i8 %a) {
+; CHECK-LABEL: @neg_shl2_trunc_ne0_i8(
+; CHECK-NEXT:    ret i1 false
+;
+  %shl = shl i8 128, %a
+  %t = trunc i8 %shl to i6
+  %r = icmp ne i6 %t, 0
+  ret i1 %r
+}
+
 define i1 @shl1_trunc_sgt4(i32 %a) {
 ; CHECK-LABEL: @shl1_trunc_sgt4(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[A:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/known-bits-lerp-pattern.ll b/llvm/test/Transforms/InstCombine/known-bits-lerp-pattern.ll
new file mode 100644
index 0000000..5345736
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/known-bits-lerp-pattern.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Test known bits refinements for pattern: a * (b - c) + c * d
+; where a > 0, c > 0, b > 0, d > 0, and b > c.
+; This pattern is a generalization of lerp and it appears frequently in graphics operations.
+
+define i32 @test_clamp(i8 %a, i8 %c, i8 %d) {
+; CHECK-LABEL: define i32 @test_clamp(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT:    [[A32:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT:    [[C32:%.*]] = zext i8 [[C]] to i32
+; CHECK-NEXT:    [[D32:%.*]] = zext i8 [[D]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = xor i32 [[C32]], 255
+; CHECK-NEXT:    [[MUL1:%.*]] = mul nuw nsw i32 [[SUB]], [[A32]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul nuw nsw i32 [[C32]], [[D32]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[MUL1]], [[MUL2]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %a32 = zext i8 %a to i32
+  %c32 = zext i8 %c to i32
+  %d32 = zext i8 %d to i32
+  %sub = sub i32 255, %c32
+  %mul1 = mul i32 %a32, %sub
+  %mul2 = mul i32 %c32, %d32
+  %add = add i32 %mul1, %mul2
+  %cmp = icmp ugt i32 %add, 65535
+  %result = select i1 %cmp, i32 65535, i32 %add
+  ret i32 %result
+}
+
+define i1 @test_trunc_cmp(i8 %a, i8 %c, i8 %d) {
+; CHECK-LABEL: define i1 @test_trunc_cmp(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT:    [[A32:%.*]] = zext i8 [[A]] to i16
+; CHECK-NEXT:    [[C32:%.*]] = zext i8 [[C]] to i16
+; CHECK-NEXT:    [[D32:%.*]] = zext i8 [[D]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = xor i16 [[C32]], 255
+; CHECK-NEXT:    [[MUL1:%.*]] = mul nuw i16 [[SUB]], [[A32]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul nuw i16 [[C32]], [[D32]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[MUL1]], [[MUL2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[ADD]], 1234
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a32 = zext i8 %a to i32
+  %c32 = zext i8 %c to i32
+  %d32 = zext i8 %d to i32
+  %sub = sub i32 255, %c32
+  %mul1 = mul i32 %a32, %sub
+  %mul2 = mul i32 %c32, %d32
+  %add = add i32 %mul1, %mul2
+  %trunc = trunc i32 %add to i16
+  %cmp = icmp eq i16 %trunc, 1234
+  ret i1 %cmp
+}
+
+define i1 @test_trunc_cmp_xor(i8 %a, i8 %c, i8 %d) {
+; CHECK-LABEL: define i1 @test_trunc_cmp_xor(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT:    [[A32:%.*]] = zext i8 [[A]] to i16
+; CHECK-NEXT:    [[C32:%.*]] = zext i8 [[C]] to i16
+; CHECK-NEXT:    [[D32:%.*]] = zext i8 [[D]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = xor i16 [[C32]], 255
+; CHECK-NEXT:    [[MUL1:%.*]] = mul nuw i16 [[SUB]], [[A32]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul nuw i16 [[C32]], [[D32]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[MUL1]], [[MUL2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[ADD]], 1234
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a32 = zext i8 %a to i32
+  %c32 = zext i8 %c to i32
+  %d32 = zext i8 %d to i32
+  %sub = xor i32 255, %c32
+  %mul1 = mul i32 %a32, %sub
+  %mul2 = mul i32 %c32, %d32
+  %add = add i32 %mul1, %mul2
+  %trunc = trunc i32 %add to i16
+  %cmp = icmp eq i16 %trunc, 1234
+  ret i1 %cmp
+}
+
+define i1 @test_trunc_cmp_arbitrary_b(i8 %a, i8 %b, i8 %c, i8 %d) {
+; CHECK-LABEL: define i1 @test_trunc_cmp_arbitrary_b(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT:    [[A32:%.*]] = zext i8 [[A]] to i16
+; CHECK-NEXT:    [[B32:%.*]] = zext i8 [[B]] to i16
+; CHECK-NEXT:    [[C32:%.*]] = zext i8 [[C]] to i16
+; CHECK-NEXT:    [[D32:%.*]] = zext i8 [[D]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i16 [[B32]], [[C32]]
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i16 [[SUB]], [[A32]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul nuw i16 [[C32]], [[D32]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[MUL1]], [[MUL2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[ADD]], 1234
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a32 = zext i8 %a to i32
+  %b32 = zext i8 %b to i32
+  %c32 = zext i8 %c to i32
+  %d32 = zext i8 %d to i32
+  %sub = sub nsw nuw i32 %b32, %c32
+  %mul1 = mul i32 %a32, %sub
+  %mul2 = mul i32 %c32, %d32
+  %add = add i32 %mul1, %mul2
+  %trunc = trunc i32 %add to i16
+  %cmp = icmp eq i16 %trunc, 1234
+  ret i1 %cmp
+}
+
+
+define i1 @test_trunc_cmp_no_a(i8 %b, i8 %c, i8 %d) {
+; CHECK-LABEL: define i1 @test_trunc_cmp_no_a(
+; CHECK-SAME: i8 [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT:    [[B32:%.*]] = zext i8 [[B]] to i16
+; CHECK-NEXT:    [[C32:%.*]] = zext i8 [[C]] to i16
+; CHECK-NEXT:    [[D32:%.*]] = zext i8 [[D]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i16 [[B32]], [[C32]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul nuw i16 [[C32]], [[D32]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[SUB]], [[MUL2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[ADD]], 1234
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %b32 = zext i8 %b to i32
+  %c32 = zext i8 %c to i32
+  %d32 = zext i8 %d to i32
+  %sub = sub nuw i32 %b32, %c32
+  %mul2 = mul i32 %c32, %d32
+  %add = add i32 %sub, %mul2
+  %trunc = trunc i32 %add to i16
+  %cmp = icmp eq i16 %trunc, 1234
+  ret i1 %cmp
+}
+
+define i1 @test_trunc_cmp_no_d(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i1 @test_trunc_cmp_no_d(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[A32:%.*]] = zext i8 [[A]] to i16
+; CHECK-NEXT:    [[B32:%.*]] = zext i8 [[B]] to i16
+; CHECK-NEXT:    [[C32:%.*]] = zext i8 [[C]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i16 [[B32]], [[C32]]
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i16 [[SUB]], [[A32]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[MUL1]], [[C32]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[ADD]], 1234
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a32 = zext i8 %a to i32
+  %b32 = zext i8 %b to i32
+  %c32 = zext i8 %c to i32
+  %sub = sub nsw nuw i32 %b32, %c32
+  %mul1 = mul i32 %a32, %sub
+  %add = add i32 %mul1, %c32
+  %trunc = trunc i32 %add to i16
+  %cmp = icmp eq i16 %trunc, 1234
+  ret i1 %cmp
+}
+
+define i1 @test_trunc_cmp_xor_negative(i8 %a, i8 %c, i8 %d) {
+; CHECK-LABEL: define i1 @test_trunc_cmp_xor_negative(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT:    [[A32:%.*]] = zext i8 [[A]] to i16
+; CHECK-NEXT:    [[C32:%.*]] = zext i8 [[C]] to i16
+; CHECK-NEXT:    [[D32:%.*]] = zext i8 [[D]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = xor i16 [[C32]], 234
+; CHECK-NEXT:    [[MUL1:%.*]] = mul nuw i16 [[SUB]], [[A32]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul nuw i16 [[C32]], [[D32]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = add i16 [[MUL1]], [[MUL2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[TRUNC]], 1234
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a32 = zext i8 %a to i32
+  %c32 = zext i8 %c to i32
+  %d32 = zext i8 %d to i32
+  %sub = xor i32 234, %c32
+  %mul1 = mul i32 %a32, %sub
+  %mul2 = mul i32 %c32, %d32
+  %add = add i32 %mul1, %mul2
+  ; We should keep the trunc in this case
+  %trunc = trunc i32 %add to i16
+  %cmp = icmp eq i16 %trunc, 1234
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
index f8c97d8..da2123a 100644
--- a/llvm/test/Transforms/InstCombine/known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -2425,6 +2425,23 @@ exit:
   ret i8 %or2
 }
 
+define <vscale x 4 x i32> @scalable_add_to_disjoint_or(i8 %x, <vscale x 4 x i32> range(i32 0, 256) %rhs) {
+; CHECK-LABEL: @scalable_add_to_disjoint_or(
+; CHECK-NEXT:    [[EXTX:%.*]] = zext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[SHIFT:%.*]] = shl nuw nsw i32 [[EXTX]], 8
+; CHECK-NEXT:    [[INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[SHIFT]], i64 0
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[ADD:%.*]] = or disjoint <vscale x 4 x i32> [[SPLAT]], [[RHS:%.*]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+;
+  %extx = zext i8 %x to i32
+  %shift = shl nuw nsw i32 %extx, 8
+  %insert = insertelement <vscale x 4 x i32> poison, i32 %shift, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %insert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %add = add <vscale x 4 x i32> %splat, %rhs
+  ret <vscale x 4 x i32> %add
+}
+
 declare void @dummy()
 declare void @use(i1)
 declare void @sink(i8)
diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
index 9e0c98b..834d48f 100644
--- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
@@ -640,14 +640,12 @@ define <2 x i64> @fp_bitcast(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
 define <4 x i32> @computesignbits_through_shuffles(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
 ; CHECK-LABEL: @computesignbits_through_shuffles(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <4 x float> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
-; CHECK-NEXT:    [[SHUF_OR1:%.*]] = or <4 x i32> [[S1]], [[S2]]
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; CHECK-NEXT:    [[S4:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
-; CHECK-NEXT:    [[SHUF_OR2:%.*]] = or <4 x i32> [[S3]], [[S4]]
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc nsw <4 x i32> [[SHUF_OR2]] to <4 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i1> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[S4:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i1> [[S3]], [[S4]]
 ; CHECK-NEXT:    [[SEL_V:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[Z:%.*]], <4 x float> [[X]]
 ; CHECK-NEXT:    [[SEL:%.*]] = bitcast <4 x float> [[SEL_V]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[SEL]]
diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll
index 87e0500..e6de063 100644
--- a/llvm/test/Transforms/InstCombine/logical-select.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select.ll
@@ -676,14 +676,12 @@ define <2 x i64> @fp_bitcast(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
 define <4 x i32> @computesignbits_through_shuffles(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
 ; CHECK-LABEL: @computesignbits_through_shuffles(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <4 x float> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
-; CHECK-NEXT:    [[SHUF_OR1:%.*]] = or <4 x i32> [[S1]], [[S2]]
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; CHECK-NEXT:    [[S4:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
-; CHECK-NEXT:    [[SHUF_OR2:%.*]] = or <4 x i32> [[S3]], [[S4]]
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc nsw <4 x i32> [[SHUF_OR2]] to <4 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i1> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[S4:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i1> [[S3]], [[S4]]
 ; CHECK-NEXT:    [[SEL_V:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[Z:%.*]], <4 x float> [[X]]
 ; CHECK-NEXT:    [[SEL:%.*]] = bitcast <4 x float> [[SEL_V]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[SEL]]
diff --git a/llvm/test/Transforms/InstCombine/modular-format.ll b/llvm/test/Transforms/InstCombine/modular-format.ll
new file mode 100644
index 0000000..d9b7b6f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/modular-format.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Test that the modular format string library call simplifier works correctly.
+;
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@.str.int = constant [3 x i8] c"%d\00"
+@.str.float = constant [3 x i8] c"%f\00"
+@.str.multi = constant [6 x i8] c"%f %d\00"
+@.str.noargs = constant [1 x i8] c"\00"
+
+;; No aspects are specified, so no transformation occurs.
+define void @test_basic(i32 %arg) {
+; CHECK-LABEL: @test_basic(
+; CHECK-NEXT:    call void (ptr, ...) @basic(ptr nonnull @.str.int, i32 [[ARG:%.*]])
+; CHECK-NEXT:    ret void
+;
+  call void (ptr, ...) @basic(ptr @.str.int, i32 %arg)
+  ret void
+}
+
+declare void @basic(ptr, ...) #0
+
+;; The "float" aspect is present and needed, so no transformation occurs.
+define void @test_float_present(double %arg) {
+; CHECK-LABEL: @test_float_present(
+; CHECK-NEXT:    call void (ptr, ...) @float_present(ptr nonnull @.str.float, double [[ARG:%.*]])
+; CHECK-NEXT:    ret void
+;
+  call void (ptr, ...) @float_present(ptr @.str.float, double %arg)
+  ret void
+}
+
+declare void @float_present(ptr, ...) #1
+
+;; The "float" aspect is present but not needed, so the call is transformed.
+define void @test_float_absent(i32 %arg) {
+; CHECK-LABEL: @test_float_absent(
+; CHECK-NEXT:    call void (ptr, ...) @float_present_mod(ptr nonnull @.str.int, i32 [[ARG:%.*]])
+; CHECK-NEXT:    ret void
+;
+  call void (ptr, ...) @float_absent(ptr @.str.int, i32 %arg)
+  ret void
+}
+
+declare void @float_absent(ptr, ...) #1
+
+;; Unknown aspects are always considered needed, so no transformation occurs.
+define void @test_unknown_aspects(i32 %arg) {
+; CHECK-LABEL: @test_unknown_aspects(
+; CHECK-NEXT:    call void (ptr, ...) @unknown_aspects(ptr nonnull @.str.int, i32 [[ARG:%.*]])
+; CHECK-NEXT:    ret void
+;
+  call void (ptr, ...) @unknown_aspects(ptr @.str.int, i32 %arg)
+  ret void
+}
+
+declare void @unknown_aspects(ptr, ...) #2
+
+;; The call has no arguments to check, so the "float" aspect is not needed and
+;; the call is transformed.
+define void @test_no_args_to_check() {
+; CHECK-LABEL: @test_no_args_to_check(
+; CHECK-NEXT:    call void (ptr, ...) @float_present_mod(ptr nonnull @.str.noargs)
+; CHECK-NEXT:    ret void
+;
+  call void (ptr, ...) @no_args_to_check(ptr @.str.noargs)
+  ret void
+}
+
+declare void @no_args_to_check(ptr, ...) #1
+
+;; The first argument index is not 2. The "float" aspect is needed, so no
+;; transformation occurs.
+define void @test_first_arg_idx(i32 %ignored, double %arg) {
+; CHECK-LABEL: @test_first_arg_idx(
+; CHECK-NEXT:    call void (i32, ptr, ...) @first_arg_idx(i32 [[IGNORED:%.*]], ptr nonnull @.str.float, double [[ARG:%.*]])
+; CHECK-NEXT:    ret void
+;
+  call void (i32, ptr, ...) @first_arg_idx(i32 %ignored, ptr @.str.float, double %arg)
+  ret void
+}
+
+declare void @first_arg_idx(i32, ptr, ...) #3
+
+;; One aspect ("unknown") is needed, but one ("float") is not. The call is
+;; transformed, and a reference to the needed aspect is emitted.
+define void @test_partial_aspects(i32 %arg) {
+; CHECK-LABEL: @test_partial_aspects(
+; CHECK-NEXT:    call void (ptr, ...) @multiple_aspects_mod(ptr nonnull @.str.int, i32 [[ARG:%.*]])
+; CHECK-NEXT:    call void @llvm.reloc.none(metadata !"basic_impl_unknown")
+; CHECK-NEXT:    ret void
+;
+  call void (ptr, ...) @partial_aspects(ptr @.str.int, i32 %arg)
+  ret void
+}
+
+declare void @partial_aspects(ptr, ...) #4
+
+attributes #0 = { "modular-format"="printf,1,2,basic_mod,basic_impl" }
+attributes #1 = { "modular-format"="printf,1,2,float_present_mod,basic_impl,float" }
+attributes #2 = { "modular-format"="printf,1,2,unknown_aspects_mod,basic_impl,unknown1,unknown2" }
+attributes #3 = { "modular-format"="printf,2,3,first_arg_idx_mod,basic_impl,float" }
+attributes #4 = { "modular-format"="printf,1,2,multiple_aspects_mod,basic_impl,float,unknown" }
diff --git a/llvm/test/Transforms/InstCombine/not.ll b/llvm/test/Transforms/InstCombine/not.ll
index d693b9d..1acf55a 100644
--- a/llvm/test/Transforms/InstCombine/not.ll
+++ b/llvm/test/Transforms/InstCombine/not.ll
@@ -1061,3 +1061,81 @@ if.else:
   call void @f2()
   unreachable
 }
+
+define i8 @invert_bitcasted_icmp(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: @invert_bitcasted_icmp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <8 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[MASK_AS_INT:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT:    ret i8 [[MASK_AS_INT]]
+;
+  %cmp = icmp sle <8 x i32> %a, %b
+  %mask.as.int = bitcast <8 x i1> %cmp to i8
+  %not = xor i8 %mask.as.int, 255
+  ret i8 %not
+}
+
+define i8 @invert_bitcasted_icmp_samesign(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: @invert_bitcasted_icmp_samesign(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp samesign sgt <8 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[MASK_AS_INT:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT:    ret i8 [[MASK_AS_INT]]
+;
+  %cmp = icmp samesign sle <8 x i32> %a, %b
+  %mask.as.int = bitcast <8 x i1> %cmp to i8
+  %not = xor i8 %mask.as.int, 255
+  ret i8 %not
+}
+
+define i8 @invert_bitcasted_icmp_multi_use_1(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: @invert_bitcasted_icmp_multi_use_1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle <8 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    call void (...) @llvm.fake.use(<8 x i1> [[CMP]])
+; CHECK-NEXT:    [[MASK_AS_INT:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[MASK_AS_INT]], -1
+; CHECK-NEXT:    ret i8 [[NOT]]
+;
+  %cmp = icmp sle <8 x i32> %a, %b
+  call void (...) @llvm.fake.use(<8 x i1> %cmp)
+  %mask.as.int = bitcast <8 x i1> %cmp to i8
+  %not = xor i8 %mask.as.int, -1
+  ret i8 %not
+}
+
+define i8 @invert_bitcasted_icmp_multi_use_2(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: @invert_bitcasted_icmp_multi_use_2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle <8 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[MASK_AS_INT:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT:    call void (...) @llvm.fake.use(i8 [[MASK_AS_INT]])
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[MASK_AS_INT]], -1
+; CHECK-NEXT:    ret i8 [[NOT]]
+;
+  %cmp = icmp sle <8 x i32> %a, %b
+  %mask.as.int = bitcast <8 x i1> %cmp to i8
+  call void (...) @llvm.fake.use(i8 %mask.as.int)
+  %not = xor i8 %mask.as.int, -1
+  ret i8 %not
+}
+
+define i8 @invert_bitcasted_fcmp(<8 x float> %a, <8 x float> %b) {
+; CHECK-LABEL: @invert_bitcasted_fcmp(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp uge <8 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[MASK_AS_INT:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT:    ret i8 [[MASK_AS_INT]]
+;
+  %cmp = fcmp olt <8 x float> %a, %b
+  %mask.as.int = bitcast <8 x i1> %cmp to i8
+  %not = xor i8 %mask.as.int, 255
+  ret i8 %not
+}
+
+define i8 @invert_bitcasted_fcmp_fast(<8 x float> %a, <8 x float> %b) {
+; CHECK-LABEL: @invert_bitcasted_fcmp_fast(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp fast uge <8 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[MASK_AS_INT:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT:    ret i8 [[MASK_AS_INT]]
+;
+  %cmp = fcmp fast olt <8 x float> %a, %b
+  %mask.as.int = bitcast <8 x i1> %cmp to i8
+  %not = xor i8 %mask.as.int, 255
+  ret i8 %not
+}
diff --git a/llvm/test/Transforms/InstCombine/or-select-zero-icmp.ll b/llvm/test/Transforms/InstCombine/or-select-zero-icmp.ll
new file mode 100644
index 0000000..a3b21cc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/or-select-zero-icmp.ll
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Basic functional test
+define i32 @basic(i32 %a, i32 %b) {
+; CHECK-LABEL: @basic(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 [[B:%.*]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %cmp = icmp eq i32 %a, 0
+  %sel = select i1 %cmp, i32 %b, i32 0
+  %or = or i32 %sel, %a
+  ret i32 %or
+}
+
+; Operand order swap test
+define i32 @swap_operand_order(i32 %x, i32 %y) {
+; CHECK-LABEL: @swap_operand_order(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = or i32 [[X]], [[SEL]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %cmp = icmp eq i32 %x, 0
+  %sel = select i1 %cmp, i32 %y, i32 0
+  %or = or i32 %x, %sel
+  ret i32 %or
+}
+
+; Negative test: Non-zero false value in select
+define i32 @negative_non_zero_false_val(i32 %a, i32 %b) {
+; CHECK-LABEL: @negative_non_zero_false_val(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[B:%.*]], i32 1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEL]], [[A]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %cmp = icmp eq i32 %a, 0
+  %sel = select i1 %cmp, i32 %b, i32 1
+  %or = or i32 %sel, %a
+  ret i32 %or
+}
+
+; Negative test: Incorrect comparison predicate (NE)
+define i32 @negative_wrong_predicate(i32 %a, i32 %b) {
+; CHECK-LABEL: @negative_wrong_predicate(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[OR:%.*]] = select i1 [[CMP]], i32 0, i32 [[TMP1:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[OR]], [[A]]
+; CHECK-NEXT:    ret i32 [[OR1]]
+;
+  %cmp = icmp ne i32 %a, 0
+  %sel = select i1 %cmp, i32 %b, i32 0
+  %or = or i32 %sel, %a
+  ret i32 %or
+}
+
+; Comparison direction swap test (0 == X)
+define i32 @cmp_swapped(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_swapped(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = or i32 [[X]], [[SEL]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %cmp = icmp eq i32 0, %x
+  %sel = select i1 %cmp, i32 %y, i32 0
+  %or = or i32 %x, %sel
+  ret i32 %or
+}
+
+; Complex expression test
+define i32 @complex_expression(i32 %a, i32 %b) {
+; CHECK-LABEL: @complex_expression(
+; CHECK-NEXT:    [[X:%.*]] = add i32 [[A:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 [[B:%.*]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %x = add i32 %a, 1
+  %cmp = icmp eq i32 %x, 0
+  %sel = select i1 %cmp, i32 %b, i32 0
+  %or = or i32 %sel, %x
+  ret i32 %or
+}
+
+; zext test
+define i32 @zext_cond(i8 %a, i32 %b) {
+; CHECK-LABEL: @zext_cond(
+; CHECK-NEXT:    [[Z:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[B:%.*]], i32 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEL]], [[Z]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %z = zext i8 %a to i32
+  %cmp = icmp eq i8 %a, 0
+  %sel = select i1 %cmp, i32 %b, i32 0
+  %or = or i32 %sel, %z
+  ret i32 %or
+}
+
+; sext test
+define i32 @sext_cond(i8 %a, i32 %b) {
+; CHECK-LABEL: @sext_cond(
+; CHECK-NEXT:    [[S:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[B:%.*]], i32 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEL]], [[S]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %s = sext i8 %a to i32
+  %cmp = icmp eq i8 %a, 0
+  %sel = select i1 %cmp, i32 %b, i32 0
+  %or = or i32 %sel, %s
+  ret i32 %or
+}
+
+; Vector type test
+define <2 x i32> @vector_type(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @vector_type(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[B:%.*]], <2 x i32> [[A]]
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
+  %cmp = icmp eq <2 x i32> %a, zeroinitializer
+  %sel = select <2 x i1> %cmp, <2 x i32> %b, <2 x i32> zeroinitializer
+  %or = or <2 x i32> %sel, %a
+  ret <2 x i32> %or
+}
+
+; Pointer type test (should not trigger optimization)
+define ptr @pointer_type(ptr %p, ptr %q) {
+; CHECK-LABEL: @pointer_type(
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[P:%.*]] to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[P]], null
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], ptr [[Q:%.*]], ptr null
+; CHECK-NEXT:    [[SEL_INT:%.*]] = ptrtoint ptr [[SEL]] to i64
+; CHECK-NEXT:    [[OR:%.*]] = or i64 [[A]], [[SEL_INT]]
+; CHECK-NEXT:    [[RET:%.*]] = inttoptr i64 [[OR]] to ptr
+; CHECK-NEXT:    ret ptr [[RET]]
+;
+  %a = ptrtoint ptr %p to i64
+  %cmp = icmp eq i64 %a, 0
+  %sel = select i1 %cmp, ptr %q, ptr null
+  %sel_int = ptrtoint ptr %sel to i64
+  %or_val = or i64 %a, %sel_int
+  %ret = inttoptr i64 %or_val to ptr
+  ret ptr %ret
+}
+
+; Multi-use test (should not trigger optimization)
+define i32 @multi_use_test(i32 %x, i32 %m) {
+; CHECK-LABEL: @multi_use_test(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[M:%.*]], i32 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEL]], [[X]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SEL]], [[X]]
+; CHECK-NEXT:    [[O2:%.*]] = sub i32 [[OR]], [[ADD]]
+; CHECK-NEXT:    ret i32 [[O2]]
+;
+  %cmp = icmp eq i32 %x, 0
+  %sel = select i1 %cmp, i32 %m, i32 0
+  %or = or i32 %sel, %x
+  %add = add i32 %sel, %x
+  %res = sub i32 %or, %add
+  ret i32 %res
+}
diff --git a/llvm/test/Transforms/InstCombine/ptrauth-intrinsics.ll b/llvm/test/Transforms/InstCombine/ptrauth-intrinsics.ll
index 208e162..22c330f 100644
--- a/llvm/test/Transforms/InstCombine/ptrauth-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/ptrauth-intrinsics.ll
@@ -160,6 +160,43 @@ define i64 @test_ptrauth_resign_ptrauth_constant(ptr %p) {
   ret i64 %authed
 }
 
+@ds = external global i8
+
+define i64 @test_ptrauth_nop_ds1(ptr %p) {
+; CHECK-LABEL: @test_ptrauth_nop_ds1(
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P:%.*]] to i64
+; CHECK-NEXT:    [[SIGNED:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[TMP0]], i32 1, i64 1234) [ "deactivation-symbol"(ptr @ds) ]
+; CHECK-NEXT:    [[AUTHED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[SIGNED]], i32 1, i64 1234)
+; CHECK-NEXT:    ret i64 [[AUTHED]]
+;
+  %tmp0 = ptrtoint ptr %p to i64
+  %signed = call i64 @llvm.ptrauth.sign(i64 %tmp0, i32 1, i64 1234) [ "deactivation-symbol"(ptr @ds) ]
+  %authed = call i64 @llvm.ptrauth.auth(i64 %signed, i32 1, i64 1234)
+  ret i64 %authed
+}
+
+define i64 @test_ptrauth_nop_ds2(ptr %p) {
+; CHECK-LABEL: @test_ptrauth_nop_ds2(
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P:%.*]] to i64
+; CHECK-NEXT:    [[SIGNED:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[TMP0]], i32 1, i64 1234)
+; CHECK-NEXT:    [[AUTHED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[SIGNED]], i32 1, i64 1234) [ "deactivation-symbol"(ptr @ds) ]
+; CHECK-NEXT:    ret i64 [[AUTHED]]
+;
+  %tmp0 = ptrtoint ptr %p to i64
+  %signed = call i64 @llvm.ptrauth.sign(i64 %tmp0, i32 1, i64 1234)
+  %authed = call i64 @llvm.ptrauth.auth(i64 %signed, i32 1, i64 1234) [ "deactivation-symbol"(ptr @ds) ]
+  ret i64 %authed
+}
+
+define i64 @test_ptrauth_nop_ds_constant() {
+; CHECK-LABEL: @test_ptrauth_nop_ds_constant(
+; CHECK-NEXT:    [[AUTHED:%.*]] = call i64 @llvm.ptrauth.auth(i64 ptrtoint (ptr ptrauth (ptr @foo, i32 1, i64 1234, ptr null, ptr @ds) to i64), i32 1, i64 1234)
+; CHECK-NEXT:    ret i64 [[AUTHED]]
+;
+  %authed = call i64 @llvm.ptrauth.auth(i64 ptrtoint(ptr ptrauth(ptr @foo, i32 1, i64 1234, ptr null, ptr @ds) to i64), i32 1, i64 1234)
+  ret i64 %authed
+}
+
 declare i64 @llvm.ptrauth.auth(i64, i32, i64)
 declare i64 @llvm.ptrauth.sign(i64, i32, i64)
 declare i64 @llvm.ptrauth.resign(i64, i32, i64, i32, i64)
diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
index adf3aa1..f33fa50 100644
--- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
+++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
@@ -206,7 +206,7 @@ define ptr @gep_sub_ptrtoaddr_different_obj(ptr %p, ptr %p2, ptr %p3) {
   ret ptr %gep
 }
 
-; The use in ptrtoaddr should be replaced. The uses in ptrtoint and icmp should
+; The use in ptrtoaddr and icmp should be replaced. The use in ptrtoint should
 ; not be replaced, as the non-address bits differ. The use in the return value
 ; should not be replaced as the provenace differs.
 define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2, ptr addrspace(1) %p3) {
@@ -216,7 +216,7 @@ define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace(
 ; CHECK-NEXT:    [[P2_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[P2_ADDR]], [[P_ADDR]]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[P]], i32 [[SUB]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr addrspace(1) [[GEP]], [[P3]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P2]], [[P3]]
 ; CHECK-NEXT:    call void @use.i1(i1 [[CMP]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64
 ; CHECK-NEXT:    [[INT:%.*]] = trunc i64 [[TMP1]] to i32
diff --git a/llvm/test/Transforms/InstCombine/recurrence.ll b/llvm/test/Transforms/InstCombine/recurrence.ll
index f75e0d4..643e7efc 100644
--- a/llvm/test/Transforms/InstCombine/recurrence.ll
+++ b/llvm/test/Transforms/InstCombine/recurrence.ll
@@ -24,9 +24,9 @@ loop:                                             ; preds = %loop, %entry
 define i64 @test_or2(i64 %a, i64 %b) {
 ; CHECK-LABEL: @test_or2(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = or i64 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV_NEXT:%.*]] = or i64 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    tail call void @use(i64 [[IV_NEXT]])
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
@@ -104,9 +104,9 @@ loop:                                             ; preds = %loop, %entry
 define i64 @test_and2(i64 %a, i64 %b) {
 ; CHECK-LABEL: @test_and2(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = and i64 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV_NEXT:%.*]] = and i64 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    tail call void @use(i64 [[IV_NEXT]])
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index cfd679c..1294f86 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -2351,3 +2351,339 @@ define i8 @fold_add_umax_to_usub_multiuse(i8 %a) {
 }
 
 declare void @usei8(i8)
+
+define i8 @sadd_sat_uge_int_max(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_uge_int_max(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_ugt_int_max(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_ugt_int_max(
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sgt i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_eq_int_max(i8 %x) {
+; CHECK-LABEL: @sadd_sat_eq_int_max(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 1)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp eq i8 %x, 127
+  %add = add i8 %x, 1
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_constant(i8 %x) {
+; CHECK-LABEL: @sadd_sat_constant(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 118
+  %add = add i8 %x, 10
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_negative_no_fold(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_negative_no_fold(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_wrong_predicate(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_wrong_predicate(
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp slt i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_wrong_constant(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_wrong_constant(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], 125
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 126
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define <2 x i8> @sadd_sat_vector(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sadd_sat_vector(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 127)
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 127), <2 x i8> [[ADD]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %cmp = icmp sge <2 x i8> %x, <i8 127, i8 127>
+  %add = add <2 x i8> %x, %y
+  %r = select <2 x i1> %cmp, <2 x i8> <i8 127, i8 127>, <2 x i8> %add
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @sadd_sat_vector_constant(<2 x i8> %x) {
+; CHECK-LABEL: @sadd_sat_vector_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 117, i8 107>)
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], <i8 10, i8 20>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %cmp = icmp sge <2 x i8> %x, <i8 118, i8 108>
+  %add = add <2 x i8> %x, <i8 10, i8 20>
+  %r = select <2 x i1> %cmp, <2 x i8> <i8 127, i8 127>, <2 x i8> %add
+  ret <2 x i8> %r
+}
+
+define i8 @sadd_sat_int_max_minus_x(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_commuted(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_commuted(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[SUB]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sgt i8 %y, %sub
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_nonstrict(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_nonstrict(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sle i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_commuted_nonstrict(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_commuted_nonstrict(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp slt i8 [[Y:%.*]], [[SUB]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sge i8 %y, %sub
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_wrong_constant(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_wrong_constant(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 126, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 126, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_wrong_predicate(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_wrong_predicate(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sgt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define <2 x i8> @sadd_sat_int_max_minus_x_vector(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_vector(
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i8> splat (i8 127), [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 127), <2 x i8> [[ADD]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %sub = sub <2 x i8> <i8 127, i8 127>, %x
+  %cmp = icmp slt <2 x i8> %sub, %y
+  %add = add <2 x i8> %x, %y
+  %r = select <2 x i1> %cmp, <2 x i8> <i8 127, i8 127>, <2 x i8> %add
+  ret <2 x i8> %r
+}
+
+define i8 @sadd_sat_commuted_select(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_commuted_select(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 %add, i8 127
+  ret i8 %r
+}
+
+define i8 @sadd_sat_commuted_add(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_commuted_add(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %y, %x
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_commuted_both(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_commuted_both(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %y, %x
+  %r = select i1 %cmp, i8 %add, i8 127
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_nsw_slt(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_nsw_slt(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub nsw i8 127, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_nsw_sge_commuted(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_nsw_sge_commuted(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub nsw i8 127, %x
+  %cmp = icmp sge i8 %y, %sub
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_no_nsw_neg(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_no_nsw_neg(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @neg_no_nsw(i8 %x, i8 %y) {
+; CHECK-LABEL: @neg_no_nsw(
+; CHECK-NEXT:    [[ADD:%.*]] = sub i8 127, [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[ADD]]
+; CHECK-NEXT:    [[D:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 127, i8 [[D]]
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %add = sub i8 127, %y
+  %cmp = icmp sgt i8 %x, %add
+  %d = add i8 %x, %y
+  %s = select i1 %cmp, i8 127, i8 %d
+  ret i8 %s
+}
+
+define i8 @neg_neg_constant(i8 %x, i8 %y) {
+; CHECK-LABEL: @neg_neg_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 -1)
+; CHECK-NEXT:    [[S:%.*]] = and i8 [[TMP1]], 127
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %cmp = icmp sgt i8 %x, -2
+  %d = add i8 %x, -128
+  %s = select i1 %cmp, i8 127, i8 %d
+  ret i8 %s
+}
+
+; Make sure we don't crash in this case.
+define i32 @pr153053_strict_pred_with_nonconstant_rhs(i32 %x, i32 %y) {
+; CHECK-LABEL: @pr153053_strict_pred_with_nonconstant_rhs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 1
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 2147483647
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %cmp = icmp slt i32 %x, %y
+  %add = add i32 %x, 1
+  %res = select i1 %cmp, i32 %add, i32 2147483647
+  ret i32 %res
+}
diff --git a/llvm/test/Transforms/InstCombine/scalarization.ll b/llvm/test/Transforms/InstCombine/scalarization.ll
index a6931b4..c4adf75 100644
--- a/llvm/test/Transforms/InstCombine/scalarization.ll
+++ b/llvm/test/Transforms/InstCombine/scalarization.ll
@@ -108,6 +108,50 @@ for.end:
   ret void
 }
 
+define void @scalarize_phi_sub(ptr %n, ptr %inout) {
+;
+; CHECK-LABEL: @scalarize_phi_sub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T0:%.*]] = load volatile float, ptr [[INOUT:%.*]], align 4
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[T1:%.*]] = load i32, ptr [[N:%.*]], align 4
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]]
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.body:
+; CHECK-NEXT:    store volatile float [[TMP0]], ptr [[INOUT]], align 4
+; CHECK-NEXT:    [[TMP1]] = fsub float 0.000000e+00, [[TMP0]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %t0 = load volatile float, ptr %inout, align 4
+  %insert = insertelement <4 x float> poison, float %t0, i32 0
+  %splat = shufflevector <4 x float> %insert, <4 x float> poison, <4 x i32> zeroinitializer
+  br label %for.cond
+
+for.cond:
+  %x.0 = phi <4 x float> [ %splat, %entry ], [ %sub, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %t1 = load i32, ptr %n, align 4
+  %cmp = icmp ne i32 %i.0, %t1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %t2 = extractelement <4 x float> %x.0, i32 1
+  store volatile float %t2, ptr %inout, align 4
+  %sub = fsub <4 x float> zeroinitializer, %x.0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
 define float @extract_element_binop_splat_constant_index(<4 x float> %x) {
 ;
 ; CHECK-LABEL: @extract_element_binop_splat_constant_index(
diff --git a/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll
index bc988a9..d783cbf 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll
@@ -263,5 +263,5 @@ define i1 @neg_icmp_eq_implies_trunc(i8 %x, i1 %c) {
 !1 = !{!"branch_weights", i32 2, i32 3}
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
-; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+; CHECK: [[PROF1]] = !{!"unknown", !"instcombine"}
 ;.
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
index df60078..a7ff967 100644
--- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
@@ -10,6 +10,8 @@ declare float @llvm.trunc.f32(float)
 declare float @llvm.arithmetic.fence.f32(float)
 declare float @llvm.minnum.f32(float, float)
 declare float @llvm.maxnum.f32(float, float)
+declare float @llvm.minimumnum.f32(float, float)
+declare float @llvm.maximumnum.f32(float, float)
 
 
 define float @ninf_user_select_inf(i1 %cond, float %x, float %y) {
@@ -1314,7 +1316,7 @@ define nofpclass(pinf) float @ret_nofpclass_pinf__minnum_ninf(i1 %cond, float %x
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
 ; CHECK-NEXT:    ret float 0xFFF0000000000000
 ;
-  %min = call float @llvm.minnum.f32(float %x, float 0xFFF0000000000000)
+  %min = call float @llvm.minimumnum.f32(float %x, float 0xFFF0000000000000)
   ret float %min
 }
 
@@ -1335,6 +1337,6 @@ define nofpclass(ninf) float @ret_nofpclass_ninf__maxnum_pinf(i1 %cond, float %x
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
 ; CHECK-NEXT:    ret float 0x7FF0000000000000
 ;
-  %max = call float @llvm.maxnum.f32(float %x, float 0x7FF0000000000000)
+  %max = call float @llvm.maximumnum.f32(float %x, float 0x7FF0000000000000)
   ret float %max
 }
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll
index 5a4fb04..2765c75 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll
@@ -610,6 +610,16 @@ define void @size_returning_aligned_update_test() {
   ret void
 }
 
+;; Check that !alloc_token is preserved.
+; HOTCOLD-LABEL: @new_alloc_token()
+define void @new_alloc_token() {
+  ;; Attribute cold converted to __hot_cold_t cold value.
+  ; HOTCOLD: @_Znwm12__hot_cold_t(i64 10, i8 [[COLD]]), !alloc_token ![[ALLOC_TOKEN:[0-9]+]]
+  %call = call ptr @_Znwm(i64 10) #0, !alloc_token !0
+  call void @dummy(ptr %call)
+  ret void
+}
+
 ;; So that instcombine doesn't optimize out the call.
 declare void @dummy(ptr)
 
@@ -649,3 +659,6 @@ attributes #5 = { "memprof" = "hot" }
 attributes #8 = { "memprof" = "ambiguous" }
 
 attributes #6 = { nobuiltin allocsize(0) "memprof"="cold" }
+
+; CHECK: [[ALLOC_TOKEN]] = !{!"MyType", i1 false}
+!0 = !{!"MyType", i1 false}
diff --git a/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll b/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll
new file mode 100644
index 0000000..8ceb310
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -p instcombine -S %s | FileCheck %s
+
+define i64 @test_dereferenceable_assume(ptr %p, ptr %q, i1 %c.0) {
+; CHECK-LABEL: define i64 @test_dereferenceable_assume(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
+; CHECK-NEXT:    br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    ret i64 [[DIFF]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %p_int = ptrtoint ptr %p to i64
+  %q_int = ptrtoint ptr %q to i64
+  %diff = sub i64 %q_int, %p_int
+  call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
+  br i1 %c.0, label %then, label %else
+
+then:
+  ret i64 %diff
+
+else:
+  ret i64 0
+}
+
+define i64 @test_sink_with_dereferenceable_assume_same_block_as_user(ptr %p, ptr %q, i1 %c.0) {
+; CHECK-LABEL: define i64 @test_sink_with_dereferenceable_assume_same_block_as_user(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT:    [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
+; CHECK-NEXT:    ret i64 [[DIFF]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %p_int = ptrtoint ptr %p to i64
+  %q_int = ptrtoint ptr %q to i64
+  %diff = sub i64 %q_int, %p_int
+  br i1 %c.0, label %then, label %else
+
+then:
+  call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
+  ret i64 %diff
+
+else:
+  ret i64 0
+}
+
+define i64 @test_sink_with_multiple_users_dominated_by_deref(ptr %p, ptr %q, i1 %c.0, i1 %c.1) {
+; CHECK-LABEL: define i64 @test_sink_with_multiple_users_dominated_by_deref(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]], i1 [[C_1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
+; CHECK-NEXT:    br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
+; CHECK-NEXT:    br i1 [[C_1]], label %[[THEN_2:.*]], label %[[ELSE]]
+; CHECK:       [[THEN_2]]:
+; CHECK-NEXT:    [[DOUBLED:%.*]] = shl i64 [[DIFF]], 1
+; CHECK-NEXT:    ret i64 [[DOUBLED]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %p_int = ptrtoint ptr %p to i64
+  %q_int = ptrtoint ptr %q to i64
+  %diff = sub i64 %q_int, %p_int
+  br i1 %c.0, label %then, label %else
+
+then:
+  call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
+  br i1 %c.1, label %then.2, label %else
+
+then.2:
+  %doubled = mul i64 %diff, 2
+  ret i64 %doubled
+
+else:
+  ret i64 0
+}
+
+define i64 @test_deref_user_does_not_dominate_other_user(ptr %p, ptr %q, i1 %c.0) {
+; CHECK-LABEL: define i64 @test_deref_user_does_not_dominate_other_user(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
+; CHECK-NEXT:    br i1 [[C_0]], label %[[MIDDLE:.*]], label %[[EXIT:.*]]
+; CHECK:       [[MIDDLE]]:
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i64 [[DIFF]]
+;
+entry:
+  %p_int = ptrtoint ptr %p to i64
+  %q_int = ptrtoint ptr %q to i64
+  %diff = sub i64 %q_int, %p_int
+  br i1 %c.0, label %middle, label %exit
+
+middle:
+  call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
+  br label %exit
+
+exit:
+  ret i64 %diff
+}
+
+declare void @llvm.assume(i1 noundef)
diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll
index ee70137..01da63f 100644
--- a/llvm/test/Transforms/InstCombine/sub-gep.ll
+++ b/llvm/test/Transforms/InstCombine/sub-gep.ll
@@ -858,8 +858,7 @@ define i1 @_gep_phi2(ptr %str1, i64 %val2) {
 ; CHECK:       while.end.i:
 ; CHECK-NEXT:    br label [[_Z3FOOPKC_EXIT]]
 ; CHECK:       _Z3fooPKc.exit:
-; CHECK-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 1, [[WHILE_END_I]] ], [ 0, [[LOR_LHS_FALSE_I]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[RETVAL_0_I]], [[VAL2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i64 [ 1, [[WHILE_END_I]] ], [ [[VAL2:%.*]], [[LOR_LHS_FALSE_I]] ], [ [[VAL2]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[TOBOOL]]
 ;