10 files changed, 818 insertions, 6 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll b/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll
new file mode 100644
index 0000000..9efcf91
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=NEON
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=SVE
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sme -force-streaming | FileCheck %s --check-prefix=SME-STREAMING
+
+define void @extractions() {
+; NEON-LABEL: 'extractions'
+; NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+; NEON-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SVE-LABEL: 'extractions'
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SME-STREAMING-LABEL: 'extractions'
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+; SME-STREAMING-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+
+  ;; Legal types
+  %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+  %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+  %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+  %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+  %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+  %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+  %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+  %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+  %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+  %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+  %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+  %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+  %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+  %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+  %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+  %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+
+  ;; Wider-than-legal
+  %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+  %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+  %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+  %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+  %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+  %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+  %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+  %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+  %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+  %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+  %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+  %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+  %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+  %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+  %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+  %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+
+  ;; Narrower-than-legal
+  %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+  %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+  %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+  %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+  %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+  %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+  %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+  %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+  %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+  %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+  %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+  %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+  %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+  %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+  %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+  %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+
+  ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
index 03343e7..cb14d18 100644
--- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
@@ -254,7 +254,7 @@ define void @gcd4(ptr %A, ptr %B, i64 %M, i64 %N) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:    da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - flow [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
@@ -322,7 +322,7 @@ define void @gcd5(ptr %A, ptr %B, i64 %M, i64 %N) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:    da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
-; CHECK-NEXT:    da analyze - flow [<> *]!
+; CHECK-NEXT:    da analyze - flow [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
@@ -390,7 +390,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %conv, ptr %arrayidx5, align 4
 ; CHECK-NEXT:    da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %2 = load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - flow [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: %2 = load i32, ptr %arrayidx9, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index cdfaec7..73a415b 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -384,7 +384,7 @@ define void @symbolicsiv6(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtab
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
@@ -440,7 +440,7 @@ define void @symbolicsiv7(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtab
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4
-; CHECK-NEXT:    da analyze - flow [<>]!
+; CHECK-NEXT:    da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %arrayidx6, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll b/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll
index 64fad37..783150a 100644
--- a/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll
@@ -18,7 +18,7 @@ define void @unknown_sign(ptr %a, i64 %k) {
 ; CHECK-NEXT:  Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.0, align 1
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 2, ptr %idx.1, align 1
-; CHECK-NEXT:    da analyze - output [<>]!
+; CHECK-NEXT:    da analyze - output [*|<]!
 ; CHECK-NEXT:  Src: store i8 2, ptr %idx.1, align 1 --> Dst: store i8 2, ptr %idx.1, align 1
 ; CHECK-NEXT:    da analyze - none!
 ;
diff --git a/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll
new file mode 100644
index 0000000..9169ac3
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -da-enable-dependence-test=gcd-miv 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-GCD-MIV
+
+; offset0 = 4;
+; offset1 = 0;
+; for (i = 0; i < 100; i++) {
+;   A[offset0] = 1;
+;   A[offset1] = 2;
+;   offset0 += 3*m;
+;   offset1 += 3;
+; }
+;
+; Dependency exists between the two stores. E.g., consider `m` is
+; 12297829382473034411, which is a modular multiplicative inverse of 3 under
+; modulo 2^64. Then `offset0` is effectively `i + 4`, so accesses will be as
+; follows:
+;
+;   - A[offset0] : A[4], A[5], A[6], ...
+;   - A[offset1] : A[0], A[3], A[6], ...
+;
+define void @gcdmiv_coef_ovfl(ptr %A, i64 %m) {
+; CHECK-ALL-LABEL: 'gcdmiv_coef_ovfl'
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - output [*|<]!
+; CHECK-ALL-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+;
+; CHECK-GCD-MIV-LABEL: 'gcdmiv_coef_ovfl'
+; CHECK-GCD-MIV-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-GCD-MIV-NEXT:    da analyze - consistent output [*]!
+; CHECK-GCD-MIV-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-GCD-MIV-NEXT:    da analyze - consistent output [*|<]!
+; CHECK-GCD-MIV-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-GCD-MIV-NEXT:    da analyze - consistent output [*]!
+;
+entry:
+  %step = mul i64 3, %m
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ]
+  %offset.0 = phi i64 [ 4, %entry ] , [ %offset.0.next, %loop ]
+  %offset.1 = phi i64 [ 0, %entry ] , [ %offset.1.next, %loop ]
+  %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0
+  %gep.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1
+  store i8 1, ptr %gep.0
+  store i8 2, ptr %gep.1
+  %i.inc = add nuw nsw i64 %i, 1
+  %offset.0.next = add nsw i64 %offset.0, %step
+  %offset.1.next = add nsw i64 %offset.1, 3
+  %ec = icmp eq i64 %i.inc, 100
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll
new file mode 100644
index 0000000..bf0fafc
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -da-enable-dependence-test=strong-siv 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-STRONG-SIV
+
+; for (i = 0; i < (1LL << 62); i++) {
+;   if (0 <= 2*i - 2)
+;     A[2*i - 2] = 1;
+;
+;   if (0 <= 2*i - 4)
+;     A[2*i - 4] = 2;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between the two
+; stores, but it does exist. For example, each store will access A[0] when i
+; is 1 and 2 respectively.
+; The root cause is that the product of the BTC and the coefficient
+; ((1LL << 62) - 1 and 2) overflows in a signed sense.
+define void @strongsiv_const_ovfl(ptr %A) {
+; CHECK-LABEL: 'strongsiv_const_ovfl'
+; CHECK-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-NEXT:    da analyze - none!
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+  %offset.0 = phi i64 [ -2, %entry ], [ %offset.0.next, %loop.latch ]
+  %offset.1 = phi i64 [ -4, %entry ], [ %offset.1.next, %loop.latch ]
+  %ec = icmp eq i64 %i, 4611686018427387904
+  br i1 %ec, label %exit, label %loop.body
+
+loop.body:
+  %cond.0 = icmp sge i64 %offset.0, 0
+  %cond.1 = icmp sge i64 %offset.1, 0
+  br i1 %cond.0, label %if.then.0, label %loop.middle
+
+if.then.0:
+  %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0
+  store i8 1, ptr %gep.0
+  br label %loop.middle
+
+loop.middle:
+  br i1 %cond.1, label %if.then.1, label %loop.latch
+
+if.then.1:
+  %gep.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1
+  store i8 2, ptr %gep.1
+  br label %loop.latch
+
+loop.latch:
+  %i.inc = add nuw nsw i64 %i, 1
+  %offset.0.next = add nsw i64 %offset.0, 2
+  %offset.1.next = add nsw i64 %offset.1, 2
+  br label %loop.header
+
+exit:
+  ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-ALL: {{.*}}
+; CHECK-STRONG-SIV: {{.*}}
diff --git a/llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll
new file mode 100644
index 0000000..c5ff988
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -da-enable-dependence-test=symbolic-rdiv 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-SYMBOLIC-RDIV
+
+; for (i = 0; i < (1LL << 62); i++) {
+;   if (0 <= 2*i - 2)
+;     A[2*i - 2] = 1;
+;   A[i] = 2;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between the two
+; stores, but it does exist. For example, each store will access A[0] when i
+; is 1 and 0 respectively.
+; The root cause is that the product of the BTC and the coefficient 
+; ((1LL << 62) - 1 and 2) overflows in a signed sense.
+define void @symbolicrdiv_prod_ovfl(ptr %A) {
+; CHECK-ALL-LABEL: 'symbolicrdiv_prod_ovfl'
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+;
+; CHECK-SYMBOLIC-RDIV-LABEL: 'symbolicrdiv_prod_ovfl'
+; CHECK-SYMBOLIC-RDIV-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT:    da analyze - none!
+; CHECK-SYMBOLIC-RDIV-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT:    da analyze - none!
+; CHECK-SYMBOLIC-RDIV-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT:    da analyze - consistent output [*]!
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+  %offset = phi i64 [ -2, %entry ], [ %offset.next, %loop.latch ]
+  %ec = icmp eq i64 %i, 4611686018427387904
+  br i1 %ec, label %exit, label %loop.body
+
+loop.body:
+  %cond = icmp sge i64 %offset, 0
+  br i1 %cond, label %if.then, label %loop.latch
+
+if.then:
+  %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset
+  store i8 1, ptr %gep.0
+  br label %loop.latch
+
+loop.latch:
+  %gep.1 = getelementptr inbounds i8, ptr %A, i64 %i
+  store i8 2, ptr %gep.1
+  %i.inc = add nuw nsw i64 %i, 1
+  %offset.next = add nsw i64 %offset, 2
+  br label %loop.header
+
+exit:
+  ret void
+}
+
+; offset0 = -4611686018427387904;  // -2^62
+; offset1 =  4611686018427387904;  // 2^62
+; for (i = 0; i < (1LL << 62) - 100; i++) {
+;   if (0 <= offset0)
+;     A[offset0] = 1;
+;   if (0 <= offset1)
+;     A[offset1] = 2;
+;   offset0 += 2;
+;   offset1 -= 1;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between the two
+; stores, but it does exist. For example,
+;
+;  memory access           | i == 2^61 | i == 2^61 + 2^59 | i == 2^61 + 2^60  
+; -------------------------|-----------|------------------|-------------------
+;  A[2*i - 2^62] (offset0) |           | A[2^60]          | A[2^61]           
+;  A[-i + 2^62]  (offset1) | A[2^61]   |                  | A[2^60]           
+;
+; The root cause is that the calculation of the differenct between the two
+; constants (-2^62 and 2^62) overflows in a signed sense.
+define void @symbolicrdiv_delta_ovfl(ptr %A) {
+; CHECK-ALL-LABEL: 'symbolicrdiv_delta_ovfl'
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+;
+; CHECK-SYMBOLIC-RDIV-LABEL: 'symbolicrdiv_delta_ovfl'
+; CHECK-SYMBOLIC-RDIV-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT:    da analyze - consistent output [*]!
+; CHECK-SYMBOLIC-RDIV-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT:    da analyze - none!
+; CHECK-SYMBOLIC-RDIV-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT:    da analyze - consistent output [*]!
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+  %offset.0 = phi i64 [ -4611686018427387904, %entry ], [ %offset.0.next, %loop.latch ]
+  %offset.1 = phi i64 [ 4611686018427387904, %entry ], [ %offset.1.next, %loop.latch ]
+  %cond.0 = icmp sge i64 %offset.0, 0
+  %cond.1 = icmp sge i64 %offset.1, 0
+  br i1 %cond.0, label %if.then.0, label %loop.middle
+
+if.then.0:
+  %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0
+  store i8 1, ptr %gep.0
+  br label %loop.middle
+
+loop.middle:
+  br i1 %cond.1, label %if.then.1, label %loop.latch
+
+if.then.1:
+  %gep.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1
+  store i8 2, ptr %gep.1
+  br label %loop.latch
+
+loop.latch:
+  %i.inc = add nuw nsw i64 %i, 1
+  %offset.0.next = add nsw i64 %offset.0, 2
+  %offset.1.next = sub nsw i64 %offset.1, 1
+  %ec = icmp eq i64 %i.inc, 4611686018427387804 ; 2^62 - 100
+  br i1 %ec, label %exit, label %loop.header
+
+exit:
+  ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll
new file mode 100644
index 0000000..ba57c7b
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -da-enable-dependence-test=weak-crossing-siv 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-WEAK-CROSSING-SIV
+
+; max_i = INT64_MAX/3  // 3074457345618258602
+; for (long long i = 0; i <= max_i; i++) {
+;   A[-3*i + INT64_MAX] = 0;
+;   if (i)
+;     A[3*i - 2] = 1;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between
+; `A[-3*i + INT64_MAX]` and `A[3*i - 2]`, but it does exist. For example,
+;
+;  memory access       | i == 1           | i == max_i
+; ---------------------|------------------|------------------
+;  A[-3*i + INT64_MAX] | A[INT64_MAX - 3] | A[1]
+;  A[3*i - 2]          | A[1]             | A[INT64_MAX - 3]
+;
+; The root cause is that the calculation of the differenct between the two
+; constants (INT64_MAX and -2) triggers an overflow.
+
+define void @weakcorssing_delta_ovfl(ptr %A) {
+; CHECK-ALL-LABEL: 'weakcorssing_delta_ovfl'
+; CHECK-ALL-NEXT:  Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+;
+; CHECK-WEAK-CROSSING-SIV-LABEL: 'weakcorssing_delta_ovfl'
+; CHECK-WEAK-CROSSING-SIV-NEXT:  Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT:    da analyze - consistent output [*]!
+; CHECK-WEAK-CROSSING-SIV-NEXT:  Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT:    da analyze - none!
+; CHECK-WEAK-CROSSING-SIV-NEXT:  Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT:    da analyze - consistent output [*]!
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+  %subscript.0 = phi i64 [ 9223372036854775807, %entry ], [ %subscript.0.next, %loop.latch ]
+  %subscript.1 = phi i64 [ -2, %entry ], [ %subscript.1.next, %loop.latch ]
+  %idx.0 = getelementptr inbounds i8, ptr %A, i64 %subscript.0
+  store i8 0, ptr %idx.0
+  %cond.store = icmp ne i64 %i, 0
+  br i1 %cond.store, label %if.store, label %loop.latch
+
+if.store:
+  %idx.1 = getelementptr inbounds i8, ptr %A, i64 %subscript.1
+  store i8 1, ptr %idx.1
+  br label %loop.latch
+
+loop.latch:
+  %i.inc = add nuw nsw i64 %i, 1
+  %subscript.0.next = add nsw i64 %subscript.0, -3
+  %subscript.1.next = add nsw i64 %subscript.1, 3
+  %ec = icmp sgt i64 %i.inc, 3074457345618258602
+  br i1 %ec, label %exit, label %loop.header
+
+exit:
+  ret void
+}
+
+; max_i = INT64_MAX/3  // 3074457345618258602
+; for (long long i = 0; i <= max_i; i++) {
+;   A[-3*i + INT64_MAX] = 0;
+;   A[3*i + 1] = 1;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between
+; `A[-3*i + INT64_MAX]` and `A[3*i - 2]`, but it does exist. For example,
+;
+;  memory access       | i == 0 | i == 1           | i == max_i - 1 | i == max_i
+; ---------------------|--------|------------------|----------------|------------------
+;  A[-3*i + INT64_MAX] |        | A[INT64_MAX - 3] | A[1]           |
+;  A[3*i + 1]          | A[1]   |                  |                | A[INT64_MAX - 3]
+;
+; The root cause is that the product of the BTC, the coefficient, and 2
+; triggers an overflow.
+;
+define void @weakcorssing_prod_ovfl(ptr %A) {
+; CHECK-ALL-LABEL: 'weakcorssing_prod_ovfl'
+; CHECK-ALL-NEXT:  Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+;
+; CHECK-WEAK-CROSSING-SIV-LABEL: 'weakcorssing_prod_ovfl'
+; CHECK-WEAK-CROSSING-SIV-NEXT:  Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT:    da analyze - consistent output [*]!
+; CHECK-WEAK-CROSSING-SIV-NEXT:  Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT:    da analyze - none!
+; CHECK-WEAK-CROSSING-SIV-NEXT:  Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT:    da analyze - consistent output [*]!
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ]
+  %subscript.0 = phi i64 [ 9223372036854775807, %entry ], [ %subscript.0.next, %loop ]
+  %subscript.1 = phi i64 [ 1, %entry ], [ %subscript.1.next, %loop ]
+  %idx.0 = getelementptr inbounds i8, ptr %A, i64 %subscript.0
+  %idx.1 = getelementptr inbounds i8, ptr %A, i64 %subscript.1
+  store i8 0, ptr %idx.0
+  store i8 1, ptr %idx.1
+  %i.inc = add nuw nsw i64 %i, 1
+  %subscript.0.next = add nsw i64 %subscript.0, -3
+  %subscript.1.next = add nsw i64 %subscript.1, 3
+  %ec = icmp sgt i64 %i.inc, 3074457345618258602
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll
new file mode 100644
index 0000000..6317c38
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -da-enable-dependence-test=weak-zero-siv 2>&1 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-WEAK-ZERO-SIV
+
+; for (i = 0; i < (1LL << 62); i++) {
+;   if (0 <= 2*i - 2)
+;     A[2*i - 2] = 1;
+;   A[2] = 2;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between the two
+; stores, but it does exist. The root cause is that the product of the BTC and
+; the coefficient ((1LL << 62) - 1 and 2) overflows in a signed sense.
+;
+define void @weakzero_dst_siv_prod_ovfl(ptr %A) {
+; CHECK-ALL-LABEL: 'weakzero_dst_siv_prod_ovfl'
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - consistent output [S]!
+;
+; CHECK-WEAK-ZERO-SIV-LABEL: 'weakzero_dst_siv_prod_ovfl'
+; CHECK-WEAK-ZERO-SIV-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT:    da analyze - consistent output [*]!
+; CHECK-WEAK-ZERO-SIV-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT:    da analyze - none!
+; CHECK-WEAK-ZERO-SIV-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT:    da analyze - consistent output [S]!
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+  %offset = phi i64 [ -2, %entry ], [ %offset.next, %loop.latch ]
+  %ec = icmp eq i64 %i, 4611686018427387904
+  br i1 %ec, label %exit, label %loop.body
+
+loop.body:
+  %cond = icmp sge i64 %offset, 0
+  br i1 %cond, label %if.then, label %loop.latch
+
+if.then:
+  %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset
+  store i8 1, ptr %gep.0
+  br label %loop.latch
+
+loop.latch:
+  %gep.1 = getelementptr inbounds i8, ptr %A, i64 2
+  store i8 2, ptr %gep.1
+  %i.inc = add nuw nsw i64 %i, 1
+  %offset.next = add nsw i64 %offset, 2
+  br label %loop.header
+
+exit:
+  ret void
+}
+
+; for (i = 0; i < n; i++) {
+;   if (0 <= 2*i - 1)
+;     A[2*i - 1] = 1;
+;   A[INT64_MAX] = 2;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between the two
+; stores, but it does exist. When `%n` is 2^62, the value of `%offset` will be
+; the same as INT64_MAX at the last iteration.
+; The root cause is that the calculation of the difference between the two
+; constants (INT64_MAX and -1) overflows in a signed sense.
+;
+define void @weakzero_dst_siv_delta_ovfl(ptr %A, i64 %n) {
+; CHECK-ALL-LABEL: 'weakzero_dst_siv_delta_ovfl'
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - consistent output [S]!
+;
+; CHECK-WEAK-ZERO-SIV-LABEL: 'weakzero_dst_siv_delta_ovfl'
+; CHECK-WEAK-ZERO-SIV-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT:    da analyze - consistent output [*]!
+; CHECK-WEAK-ZERO-SIV-NEXT:  Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT:    da analyze - none!
+; CHECK-WEAK-ZERO-SIV-NEXT:  Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT:    da analyze - consistent output [S]!
+;
+entry:
+  %guard = icmp sgt i64 %n, 0
+  br i1 %guard, label %loop.header, label %exit
+
+loop.header:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+  %offset = phi i64 [ -2, %entry ], [ %offset.next, %loop.latch ]
+  %ec = icmp eq i64 %i, %n
+  br i1 %ec, label %exit, label %loop.body
+
+loop.body:
+  %cond = icmp sge i64 %offset, 0
+  br i1 %cond, label %if.then, label %loop.latch
+
+if.then:
+  %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset
+  store i8 1, ptr %gep.0
+  br label %loop.latch
+
+loop.latch:
+  %gep.1 = getelementptr inbounds i8, ptr %A, i64 9223372036854775807
+  store i8 2, ptr %gep.1
+  %i.inc = add nuw nsw i64 %i, 1
+  %offset.next = add nsw i64 %offset, 2
+  br label %loop.header
+
+exit:
+  ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
index 7dec2f8..78b4139 100644
--- a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
+++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
@@ -1448,4 +1448,85 @@ exit:                                              ; preds = %loop
   ret i16 %crc.next
 }
 
+define i16 @not.crc.data.next.outside.user(i16 %crc.init, i16 %data.init) {
+; CHECK-LABEL: 'not.crc.data.next.outside.user'
+; CHECK-NEXT:  Did not find a hash algorithm
+; CHECK-NEXT:  Reason: Recurrences have stray uses
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+  %data = phi i16 [ %data.init, %entry ], [ %data.next, %loop ]
+  %xor.crc.data = xor i16 %data, %crc
+  %crc.shl = shl i16 %crc, 1
+  %crc.xor = xor i16 %crc.shl, 3
+  %check.sb = icmp slt i16 %xor.crc.data, 0
+  %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+  %data.next = shl i16 %data, 1
+  %iv.next = add nuw nsw i32 %iv, 1
+  %exit.cond = icmp samesign ult i32 %iv, 7
+  br i1 %exit.cond, label %loop, label %exit
+
+exit:
+  %ret = xor i16 %data.next, %crc.next
+  ret i16 %ret
+}
+
+define i16 @not.crc.data.phi.outside.user(i16 %crc.init, i16 %data.init) {
+; CHECK-LABEL: 'not.crc.data.phi.outside.user'
+; CHECK-NEXT:  Did not find a hash algorithm
+; CHECK-NEXT:  Reason: Recurrences have stray uses
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+  %data = phi i16 [ %data.init, %entry ], [ %data.next, %loop ]
+  %xor.crc.data = xor i16 %data, %crc
+  %crc.shl = shl i16 %crc, 1
+  %crc.xor = xor i16 %crc.shl, 3
+  %check.sb = icmp slt i16 %xor.crc.data, 0
+  %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+  %data.next = shl i16 %data, 1
+  %iv.next = add nuw nsw i32 %iv, 1
+  %exit.cond = icmp samesign ult i32 %iv, 7
+  br i1 %exit.cond, label %loop, label %exit
+
+exit:
+  %ret = xor i16 %data, %crc.next
+  ret i16 %ret
+}
+
+define i16 @not.crc.crc.phi.outside.user(i16 %crc.init, i16 %data.init) {
+; CHECK-LABEL: 'not.crc.crc.phi.outside.user'
+; CHECK-NEXT:  Did not find a hash algorithm
+; CHECK-NEXT:  Reason: Recurrences have stray uses
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+  %data = phi i16 [ %data.init, %entry ], [ %data.next, %loop ]
+  %xor.crc.data = xor i16 %data, %crc
+  %crc.shl = shl i16 %crc, 1
+  %crc.xor = xor i16 %crc.shl, 3
+  %check.sb = icmp slt i16 %xor.crc.data, 0
+  %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+  %data.next = shl i16 %data, 1
+  %iv.next = add nuw nsw i32 %iv, 1
+  %exit.cond = icmp samesign ult i32 %iv, 7
+  br i1 %exit.cond, label %loop, label %exit
+
+exit:
+  %ret = xor i16 %crc, %crc.next
+  ret i16 %ret
+}
+
 declare i16 @side.effect()