diff options
Diffstat (limited to 'llvm/test')
106 files changed, 17356 insertions, 10387 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll b/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll new file mode 100644 index 0000000..9efcf91 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll @@ -0,0 +1,216 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=NEON +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=SVE +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sme -force-streaming | FileCheck %s --check-prefix=SME-STREAMING + +define void @extractions() { +; NEON-LABEL: 'extractions' +; NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SVE-LABEL: 'extractions' +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SME-STREAMING-LABEL: 'extractions' +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + + ;; Legal types + %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison) + %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison) + %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) + %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison) + %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison) + %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison) + %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison) + %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison) + %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison) + %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison) + %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) + %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison) + %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison) + %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison) + %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison) + %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison) + + ;; Wider-than-legal + %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison) + %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison) + %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) + %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison) + %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison) + %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison) + %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison) + %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison) + %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison) + %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison) + %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) + %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison) + %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison) + %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison) + %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison) + %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison) + + ;; Narrower-than-legal + %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison) + %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison) + %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) + %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison) + %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison) + %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison) + %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison) + %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison) + %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison) + %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison) + %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) + %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison) + %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison) + %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison) + %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison) + %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison) + + ret void +} diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 362586a..4fc506f 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -87,6 +87,11 @@ declare void @llvm.nvvm.barrier(i32, i32) declare void @llvm.nvvm.barrier.sync(i32) declare void @llvm.nvvm.barrier.sync.cnt(i32, i32) +declare float @llvm.nvvm.ex2.approx.f(float) +declare double @llvm.nvvm.ex2.approx.d(double) +declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>) +declare float @llvm.nvvm.ex2.approx.ftz.f(float) + ; CHECK-LABEL: @simple_upgrade define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a) @@ -355,3 +360,15 @@ define void @cta_barriers(i32 %x, i32 %y) { call void @llvm.nvvm.barrier.sync.cnt(i32 %x, i32 %y) ret void } + +define void @nvvm_ex2_approx(float %a, double %b, half %c, <2 x half> %d) { +; CHECK: call float @llvm.nvvm.ex2.approx.f32(float %a) +; CHECK: call double @llvm.nvvm.ex2.approx.f64(double %b) +; CHECK: call <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half> %d) +; CHECK: call float @llvm.nvvm.ex2.approx.ftz.f32(float %a) + %r1 = call float @llvm.nvvm.ex2.approx.f(float %a) + %r2 = call double @llvm.nvvm.ex2.approx.d(double %b) + %r3 = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %d) + %r4 = call float @llvm.nvvm.ex2.approx.ftz.f(float %a) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/stackmap.ll b/llvm/test/CodeGen/AArch64/stackmap.ll index 995d254..26221d0 100644 --- a/llvm/test/CodeGen/AArch64/stackmap.ll +++ b/llvm/test/CodeGen/AArch64/stackmap.ll @@ -81,14 +81,14 @@ ; CHECK-NEXT: .hword 8 ; CHECK-NEXT: .hword 0 ; CHECK-NEXT: .hword 0 -; CHECK-NEXT: .word 65535 +; CHECK-NEXT: .word -1 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .hword 8 ; CHECK-NEXT: .hword 0 ; CHECK-NEXT: .hword 0 -; CHECK-NEXT: .word 65535 +; CHECK-NEXT: .word -1 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll new file mode 100644 index 0000000..e440bee --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s + +define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) { +; GFX11-FAKE16-LABEL: fadd_s16_uniform: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1 +; GFX11-FAKE16-NEXT: ; return to shader part epilog +; +; GFX11-TRUE16-LABEL: fadd_s16_uniform: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1 +; GFX11-TRUE16-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: fadd_s16_uniform: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_add_f16 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: ; return to shader part epilog + %fadd = fadd half %a, %b + ret half %fadd +} + +define amdgpu_ps half @fadd_s16_div(half %a, half %b) { +; GFX11-FAKE16-LABEL: fadd_s16_div: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX11-FAKE16-NEXT: ; return to shader part epilog +; +; GFX11-TRUE16-LABEL: fadd_s16_div: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l +; GFX11-TRUE16-NEXT: ; return to shader part epilog +; +; GFX12-FAKE16-LABEL: fadd_s16_div: +; GFX12-FAKE16: ; %bb.0: +; GFX12-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX12-FAKE16-NEXT: ; return to shader part epilog +; +; GFX12-TRUE16-LABEL: fadd_s16_div: +; GFX12-TRUE16: ; %bb.0: +; GFX12-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l +; GFX12-TRUE16-NEXT: ; return to shader part epilog + %fadd = fadd half %a, %b + ret half %fadd +} + +define amdgpu_ps float @fadd_s32_uniform(float inreg %a, float inreg %b) { +; GFX11-LABEL: fadd_s32_uniform: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_add_f32_e64 v0, s0, s1 +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: fadd_s32_uniform: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_add_f32 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: ; return to shader part epilog + %fadd = fadd float %a, %b + ret float %fadd +} + +define amdgpu_ps float @fadd_s32_div(float %a, float %b) { +; GCN-LABEL: fadd_s32_div: +; GCN: ; %bb.0: +; GCN-NEXT: v_add_f32_e32 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %fadd = fadd float %a, %b + ret float %fadd +} + +define amdgpu_ps void @fadd_s64_uniform(double inreg %a, double inreg %b, ptr addrspace(1) %ptr) { +; GFX11-LABEL: fadd_s64_uniform: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_add_f64 v[2:3], s[0:1], s[2:3] +; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fadd_s64_uniform: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_add_f64_e64 v[2:3], s[0:1], s[2:3] +; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX12-NEXT: s_endpgm + %fadd = fadd double %a, %b + store double %fadd, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_ps void @fadd_s64_div(double %a, double %b, ptr addrspace(1) %ptr) { +; GFX11-LABEL: fadd_s64_div: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fadd_s64_div: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_add_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX12-NEXT: s_endpgm + %fadd = fadd double %a, %b + store double %fadd, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_ps <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) { +; GFX11-LABEL: fadd_v2s16_uniform: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_pk_add_f16 v0, s0, s1 +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: fadd_v2s16_uniform: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_lshr_b32 s2, s0, 16 +; GFX12-NEXT: s_lshr_b32 s3, s1, 16 +; GFX12-NEXT: s_add_f16 s0, s0, s1 +; GFX12-NEXT: s_add_f16 s1, s2, s3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: ; return to shader part epilog + %fadd = fadd <2 x half> %a, %b + ret <2 x half> %fadd +} + +define amdgpu_ps <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) { +; GCN-LABEL: fadd_v2s16_div: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_add_f16 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %fadd = fadd <2 x half> %a, %b + ret <2 x half> %fadd +} + +define amdgpu_ps <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) { +; GFX11-LABEL: fadd_v2s32_uniform: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_add_f32_e64 v0, s0, s2 +; GFX11-NEXT: v_add_f32_e64 v1, s1, s3 +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: fadd_v2s32_uniform: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_add_f32 s0, s0, s2 +; GFX12-NEXT: s_add_f32 s1, s1, s3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-NEXT: ; return to shader part epilog + %fadd = fadd <2 x float> %a, %b + ret <2 x float> %fadd +} + +define amdgpu_ps <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) { +; GCN-LABEL: fadd_v2s32_div: +; GCN: ; %bb.0: +; GCN-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 +; GCN-NEXT: ; return to shader part epilog + %fadd = fadd <2 x float> %a, %b + ret <2 x float> %fadd +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll index 1a7ccf0..588802c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s -; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s -; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) { ; GFX7-LABEL: fcmp_uniform_select: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir index 67cc016..b6652f6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX7 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GF8 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select %s -o - | FileCheck -check-prefixes=GFX7 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select %s -o - | FileCheck -check-prefixes=GF8 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select %s -o - | FileCheck -check-prefixes=GFX11 %s --- name: test_copy_scc_vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll index 02d0e52..6facdfd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll @@ -104,109 +104,110 @@ define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) { ret <4 x i32> %res } -define amdgpu_cs i16 @abs_vgpr_i16(i16 %arg) { +define i16 @abs_vgpr_i16(i16 %arg) { ; GFX6-LABEL: abs_vgpr_i16: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v0 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: ; return to shader part epilog +; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: abs_vgpr_i16: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_sub_u16_e32 v1, 0, v0 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v1 -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: ; return to shader part epilog +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: abs_vgpr_i16: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_sub_nc_u16 v1, 0, v0 ; GFX10-NEXT: v_max_i16 v0, v0, v1 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 -; GFX10-NEXT: ; return to shader part epilog +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: abs_vgpr_i16: ; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_sub_nc_u16 v1, 0, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-NEXT: v_max_i16 v0, v0, v1 -; GFX1250-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %res = call i16 @llvm.abs.i16(i16 %arg, i1 false) ret i16 %res } -define amdgpu_cs i32 @abs_vgpr_i32(i32 %arg) { +define i32 @abs_vgpr_i32(i32 %arg) { ; GFX6-LABEL: abs_vgpr_i32: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v0 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: ; return to shader part epilog +; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: abs_vgpr_i32: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 0, v0 ; GFX8-NEXT: v_max_i32_e32 v0, v0, v1 -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: ; return to shader part epilog +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: abs_vgpr_i32: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 ; GFX10-NEXT: v_max_i32_e32 v0, v0, v1 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 -; GFX10-NEXT: ; return to shader part epilog +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: abs_vgpr_i32: ; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_sub_nc_u32_e32 v1, 0, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-NEXT: v_max_i32_e32 v0, v0, v1 -; GFX1250-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %res = call i32 @llvm.abs.i32(i32 %arg, i1 false) ret i32 %res } -define amdgpu_cs i64 @abs_vgpr_i64(i64 %arg) { +define i64 @abs_vgpr_i64(i64 %arg) { ; GFX6-LABEL: abs_vgpr_i64: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc ; GFX6-NEXT: v_xor_b32_e32 v0, v0, v2 ; GFX6-NEXT: v_xor_b32_e32 v1, v1, v2 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 -; GFX6-NEXT: ; return to shader part epilog +; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: abs_vgpr_i64: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v1 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc ; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_xor_b32_e32 v1, v1, v2 -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 -; GFX8-NEXT: ; return to shader part epilog +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: abs_vgpr_i64: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_ashrrev_i32_e32 v2, 31, v1 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo ; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_xor_b32_e32 v1, v1, v2 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: ; return to shader part epilog +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: abs_vgpr_i64: ; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 31, v1 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1250-NEXT: v_mov_b32_e32 v3, v2 @@ -214,17 +215,15 @@ define amdgpu_cs i64 @abs_vgpr_i64(i64 %arg) { ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_xor_b32_e32 v0, v0, v2 ; GFX1250-NEXT: v_xor_b32_e32 v1, v1, v2 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1250-NEXT: v_readfirstlane_b32 s1, v1 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %res = call i64 @llvm.abs.i64(i64 %arg, i1 false) ret i64 %res } -define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) { +define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) { ; GFX6-LABEL: abs_vgpr_v4i32: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v0 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v4 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 @@ -233,14 +232,11 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) { ; GFX6-NEXT: v_max_i32_e32 v2, v2, v4 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v3 ; GFX6-NEXT: v_max_i32_e32 v3, v3, v4 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v2 -; GFX6-NEXT: v_readfirstlane_b32 s3, v3 -; GFX6-NEXT: ; return to shader part epilog +; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: abs_vgpr_v4i32: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0, v0 ; GFX8-NEXT: v_max_i32_e32 v0, v0, v4 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0, v1 @@ -249,14 +245,11 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) { ; GFX8-NEXT: v_max_i32_e32 v2, v2, v4 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0, v3 ; GFX8-NEXT: v_max_i32_e32 v3, v3, v4 -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 -; GFX8-NEXT: v_readfirstlane_b32 s2, v2 -; GFX8-NEXT: v_readfirstlane_b32 s3, v3 -; GFX8-NEXT: ; return to shader part epilog +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: abs_vgpr_v4i32: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_sub_nc_u32_e32 v4, 0, v0 ; GFX10-NEXT: v_sub_nc_u32_e32 v5, 0, v1 ; GFX10-NEXT: v_sub_nc_u32_e32 v6, 0, v2 @@ -265,14 +258,12 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) { ; GFX10-NEXT: v_max_i32_e32 v1, v1, v5 ; GFX10-NEXT: v_max_i32_e32 v2, v2, v6 ; GFX10-NEXT: v_max_i32_e32 v3, v3, v7 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: v_readfirstlane_b32 s2, v2 -; GFX10-NEXT: v_readfirstlane_b32 s3, v3 -; GFX10-NEXT: ; return to shader part epilog +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: abs_vgpr_v4i32: ; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_dual_sub_nc_u32 v4, 0, v0 :: v_dual_sub_nc_u32 v5, 0, v1 ; GFX1250-NEXT: v_dual_sub_nc_u32 v6, 0, v2 :: v_dual_sub_nc_u32 v7, 0, v3 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -281,13 +272,7 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) { ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1250-NEXT: v_max_i32_e32 v2, v2, v6 ; GFX1250-NEXT: v_max_i32_e32 v3, v3, v7 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1250-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1250-NEXT: v_readfirstlane_b32 s1, v1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1250-NEXT: v_readfirstlane_b32 s2, v2 -; GFX1250-NEXT: v_readfirstlane_b32 s3, v3 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false) ret <4 x i32> %res } @@ -304,44 +289,43 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) { ret <2 x i8> %res } -define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) { +define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) { ; GFX6-LABEL: abs_vgpr_v2i8: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v0 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v2 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v1 ; GFX6-NEXT: v_max_i32_e32 v1, v1, v2 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 -; GFX6-NEXT: ; return to shader part epilog +; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: abs_vgpr_v2i8: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 -; GFX8-NEXT: ; return to shader part epilog +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: abs_vgpr_v2i8: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX10-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-NEXT: v_sub_nc_u16 v2, 0, v0 ; GFX10-NEXT: v_sub_nc_u16 v3, 0, v1 ; GFX10-NEXT: v_max_i16 v0, v0, v2 ; GFX10-NEXT: v_max_i16 v1, v1, v3 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: ; return to shader part epilog +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: abs_vgpr_v2i8: ; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX1250-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -350,10 +334,7 @@ define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) { ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_max_i16 v0, v0, v2 ; GFX1250-NEXT: v_max_i16 v1, v1, v3 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1250-NEXT: v_readfirstlane_b32 s1, v1 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false) ret <2 x i8> %res } @@ -372,9 +353,10 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) { ret <3 x i8> %res } -define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) { +define <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) { ; GFX6-LABEL: abs_vgpr_v3i8: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8 @@ -384,13 +366,11 @@ define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) { ; GFX6-NEXT: v_max_i32_e32 v1, v1, v3 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v2 ; GFX6-NEXT: v_max_i32_e32 v2, v2, v3 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v2 -; GFX6-NEXT: ; return to shader part epilog +; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: abs_vgpr_v3i8: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -398,13 +378,11 @@ define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) { ; GFX8-NEXT: v_sub_u16_sdwa v3, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: v_max_i16_sdwa v1, sext(v1), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_max_i16_sdwa v2, sext(v2), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 -; GFX8-NEXT: v_readfirstlane_b32 s2, v2 -; GFX8-NEXT: ; return to shader part epilog +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: abs_vgpr_v3i8: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX10-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-NEXT: v_bfe_i32 v2, v2, 0, 8 @@ -414,13 +392,12 @@ define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) { ; GFX10-NEXT: v_max_i16 v0, v0, v3 ; GFX10-NEXT: v_max_i16 v1, v1, v4 ; GFX10-NEXT: v_max_i16 v2, v2, v5 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: v_readfirstlane_b32 s2, v2 -; GFX10-NEXT: ; return to shader part epilog +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: abs_vgpr_v3i8: ; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX1250-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX1250-NEXT: v_bfe_i32 v2, v2, 0, 8 @@ -433,12 +410,7 @@ define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) { ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250-NEXT: v_max_i16 v1, v1, v4 ; GFX1250-NEXT: v_max_i16 v2, v2, v5 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1250-NEXT: v_readfirstlane_b32 s1, v1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX1250-NEXT: v_readfirstlane_b32 s2, v2 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false) ret <3 x i8> %res } @@ -485,44 +457,44 @@ define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) { ret <2 x i16> %res } -define amdgpu_cs <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) { +define <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) { ; GFX6-LABEL: abs_vgpr_v2i16: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v0 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX6-NEXT: v_max_i32_e32 v0, v0, v2 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v1 ; GFX6-NEXT: v_max_i32_e32 v1, v1, v2 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 -; GFX6-NEXT: ; return to shader part epilog +; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: abs_vgpr_v2i16: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_sub_u16_e32 v1, 0, v0 ; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_max_i16_e32 v1, v0, v1 ; GFX8-NEXT: v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: ; return to shader part epilog +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: abs_vgpr_v2i16: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_pk_sub_i16 v1, 0, v0 ; GFX10-NEXT: v_pk_max_i16 v0, v0, v1 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 -; GFX10-NEXT: ; return to shader part epilog +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: abs_vgpr_v2i16: ; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_pk_sub_i16 v1, 0, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-NEXT: v_pk_max_i16 v0, v0, v1 -; GFX1250-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false) ret <2 x i16> %res } @@ -576,9 +548,10 @@ define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) { ret <3 x i16> %res } -define amdgpu_cs <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) { +define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) { ; GFX6-LABEL: abs_vgpr_v3i16: ; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 @@ -588,13 +561,11 @@ define amdgpu_cs <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) { ; GFX6-NEXT: v_max_i32_e32 v1, v1, v3 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v2 ; GFX6-NEXT: v_max_i32_e32 v2, v2, v3 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v2 -; GFX6-NEXT: ; return to shader part epilog +; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: abs_vgpr_v3i16: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: v_sub_u16_e32 v2, 0, v0 ; GFX8-NEXT: v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 @@ -603,31 +574,27 @@ define amdgpu_cs <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) { ; GFX8-NEXT: v_max_i16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_max_i16_e32 v1, v1, v4 -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 -; GFX8-NEXT: ; return to shader part epilog +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: abs_vgpr_v3i16: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_pk_sub_i16 v2, 0, v0 ; GFX10-NEXT: v_sub_nc_u16 v3, 0, v1 ; GFX10-NEXT: v_pk_max_i16 v0, v0, v2 ; GFX10-NEXT: v_max_i16 v1, v1, v3 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: ; return to shader part epilog +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: abs_vgpr_v3i16: ; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_pk_sub_i16 v2, 0, v0 ; GFX1250-NEXT: v_sub_nc_u16 v3, 0, v1 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_pk_max_i16 v0, v0, v2 ; GFX1250-NEXT: v_max_i16 v1, v1, v3 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1250-NEXT: v_readfirstlane_b32 s1, v1 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false) ret <3 x i16> %res } diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-to-vector.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-to-vector.ll new file mode 100644 index 0000000..05a0e39 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-to-vector.ll @@ -0,0 +1,325 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx1100 -passes=amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s + +define amdgpu_kernel void @large_array_vectors_small_users(<16 x i8> %in, <16 x i8> %add, ptr addrspace(3) %out) #0 { +; OPT-LABEL: define amdgpu_kernel void @large_array_vectors_small_users( +; OPT-SAME: <16 x i8> [[IN:%.*]], <16 x i8> [[ADD:%.*]], ptr addrspace(3) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[ALLOCA:%.*]] = freeze <128 x i8> poison +; OPT-NEXT: [[TMP0:%.*]] = extractelement <16 x i8> [[IN]], i64 0 +; OPT-NEXT: [[TMP1:%.*]] = insertelement <128 x i8> [[ALLOCA]], i8 [[TMP0]], i32 0 +; OPT-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT: [[TMP3:%.*]] = insertelement <128 x i8> [[TMP1]], i8 [[TMP2]], i32 1 +; OPT-NEXT: [[TMP4:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT: [[TMP5:%.*]] = insertelement <128 x i8> [[TMP3]], i8 [[TMP4]], i32 2 +; OPT-NEXT: [[TMP6:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT: [[TMP7:%.*]] = insertelement <128 x i8> [[TMP5]], i8 [[TMP6]], i32 3 +; OPT-NEXT: [[TMP8:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT: [[TMP9:%.*]] = insertelement <128 x i8> [[TMP7]], i8 [[TMP8]], i32 4 +; OPT-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT: [[TMP11:%.*]] = insertelement <128 x i8> [[TMP9]], i8 [[TMP10]], i32 5 +; OPT-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT: [[TMP13:%.*]] = insertelement <128 x i8> [[TMP11]], i8 [[TMP12]], i32 6 +; OPT-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT: [[TMP15:%.*]] = insertelement <128 x i8> [[TMP13]], i8 [[TMP14]], i32 7 +; OPT-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT: [[TMP17:%.*]] = insertelement <128 x i8> [[TMP15]], i8 [[TMP16]], i32 8 +; OPT-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT: [[TMP19:%.*]] = insertelement <128 x i8> [[TMP17]], i8 [[TMP18]], i32 9 +; OPT-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT: [[TMP21:%.*]] = insertelement <128 x i8> [[TMP19]], i8 [[TMP20]], i32 10 +; OPT-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT: [[TMP23:%.*]] = insertelement <128 x i8> [[TMP21]], i8 [[TMP22]], i32 11 +; OPT-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT: [[TMP25:%.*]] = insertelement <128 x i8> [[TMP23]], i8 [[TMP24]], i32 12 +; OPT-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT: [[TMP27:%.*]] = insertelement <128 x i8> [[TMP25]], i8 [[TMP26]], i32 13 +; OPT-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT: [[TMP29:%.*]] = insertelement <128 x i8> [[TMP27]], i8 [[TMP28]], i32 14 +; OPT-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT: [[TMP31:%.*]] = insertelement <128 x i8> [[TMP29]], i8 [[TMP30]], i32 15 +; OPT-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[IN]], i64 0 +; OPT-NEXT: [[TMP33:%.*]] = insertelement <128 x i8> [[TMP31]], i8 [[TMP32]], i32 0 +; OPT-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT: [[TMP35:%.*]] = insertelement <128 x i8> [[TMP33]], i8 [[TMP34]], i32 1 +; OPT-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT: [[TMP37:%.*]] = insertelement <128 x i8> [[TMP35]], i8 [[TMP36]], i32 2 +; OPT-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT: [[TMP39:%.*]] = insertelement <128 x i8> [[TMP37]], i8 [[TMP38]], i32 3 +; OPT-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT: [[TMP41:%.*]] = insertelement <128 x i8> [[TMP39]], i8 [[TMP40]], i32 4 +; OPT-NEXT: [[TMP42:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT: [[TMP43:%.*]] = insertelement <128 x i8> [[TMP41]], i8 [[TMP42]], i32 5 +; OPT-NEXT: [[TMP44:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT: [[TMP45:%.*]] = insertelement <128 x i8> [[TMP43]], i8 [[TMP44]], i32 6 +; OPT-NEXT: [[TMP46:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT: [[TMP47:%.*]] = insertelement <128 x i8> [[TMP45]], i8 [[TMP46]], i32 7 +; OPT-NEXT: [[TMP48:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT: [[TMP49:%.*]] = insertelement <128 x i8> [[TMP47]], i8 [[TMP48]], i32 8 +; OPT-NEXT: [[TMP50:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT: [[TMP51:%.*]] = insertelement <128 x i8> [[TMP49]], i8 [[TMP50]], i32 9 +; OPT-NEXT: [[TMP52:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT: [[TMP53:%.*]] = insertelement <128 x i8> [[TMP51]], i8 [[TMP52]], i32 10 +; OPT-NEXT: [[TMP54:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT: [[TMP55:%.*]] = insertelement <128 x i8> [[TMP53]], i8 [[TMP54]], i32 11 +; OPT-NEXT: [[TMP56:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT: [[TMP57:%.*]] = insertelement <128 x i8> [[TMP55]], i8 [[TMP56]], i32 12 +; OPT-NEXT: [[TMP58:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT: [[TMP59:%.*]] = insertelement <128 x i8> [[TMP57]], i8 [[TMP58]], i32 13 +; OPT-NEXT: [[TMP60:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT: [[TMP61:%.*]] = insertelement <128 x i8> [[TMP59]], i8 [[TMP60]], i32 14 +; OPT-NEXT: [[TMP62:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT: [[TMP63:%.*]] = insertelement <128 x i8> [[TMP61]], i8 [[TMP62]], i32 15 +; OPT-NEXT: [[TMP64:%.*]] = extractelement <16 x i8> [[IN]], i64 0 +; OPT-NEXT: [[TMP65:%.*]] = insertelement <128 x i8> [[TMP63]], i8 [[TMP64]], i32 0 +; OPT-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT: [[TMP67:%.*]] = insertelement <128 x i8> [[TMP65]], i8 [[TMP66]], i32 1 +; OPT-NEXT: [[TMP68:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT: [[TMP69:%.*]] = insertelement <128 x i8> [[TMP67]], i8 [[TMP68]], i32 2 +; OPT-NEXT: [[TMP70:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT: [[TMP71:%.*]] = insertelement <128 x i8> [[TMP69]], i8 [[TMP70]], i32 3 +; OPT-NEXT: [[TMP72:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT: [[TMP73:%.*]] = insertelement <128 x i8> [[TMP71]], i8 [[TMP72]], i32 4 +; OPT-NEXT: [[TMP74:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT: [[TMP75:%.*]] = insertelement <128 x i8> [[TMP73]], i8 [[TMP74]], i32 5 +; OPT-NEXT: [[TMP76:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT: [[TMP77:%.*]] = insertelement <128 x i8> [[TMP75]], i8 [[TMP76]], i32 6 +; OPT-NEXT: [[TMP78:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT: [[TMP79:%.*]] = insertelement <128 x i8> [[TMP77]], i8 [[TMP78]], i32 7 +; OPT-NEXT: [[TMP80:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT: [[TMP81:%.*]] = insertelement <128 x i8> [[TMP79]], i8 [[TMP80]], i32 8 +; OPT-NEXT: [[TMP82:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT: [[TMP83:%.*]] = insertelement <128 x i8> [[TMP81]], i8 [[TMP82]], i32 9 +; OPT-NEXT: [[TMP84:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT: [[TMP85:%.*]] = insertelement <128 x i8> [[TMP83]], i8 [[TMP84]], i32 10 +; OPT-NEXT: [[TMP86:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT: [[TMP87:%.*]] = insertelement <128 x i8> [[TMP85]], i8 [[TMP86]], i32 11 +; OPT-NEXT: [[TMP88:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT: [[TMP89:%.*]] = insertelement <128 x i8> [[TMP87]], i8 [[TMP88]], i32 12 +; OPT-NEXT: [[TMP90:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT: [[TMP91:%.*]] = insertelement <128 x i8> [[TMP89]], i8 [[TMP90]], i32 13 +; OPT-NEXT: [[TMP92:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT: [[TMP93:%.*]] = insertelement <128 x i8> [[TMP91]], i8 [[TMP92]], i32 14 +; OPT-NEXT: [[TMP94:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT: [[TMP95:%.*]] = insertelement <128 x i8> [[TMP93]], i8 [[TMP94]], i32 15 +; OPT-NEXT: [[TMP96:%.*]] = extractelement <16 x i8> [[IN]], i64 0 +; OPT-NEXT: [[TMP97:%.*]] = insertelement <128 x i8> [[TMP95]], i8 [[TMP96]], i32 0 +; OPT-NEXT: [[TMP98:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT: [[TMP99:%.*]] = insertelement <128 x i8> [[TMP97]], i8 [[TMP98]], i32 1 +; OPT-NEXT: [[TMP100:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT: [[TMP101:%.*]] = insertelement <128 x i8> [[TMP99]], i8 [[TMP100]], i32 2 +; OPT-NEXT: [[TMP102:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT: [[TMP103:%.*]] = insertelement <128 x i8> [[TMP101]], i8 [[TMP102]], i32 3 +; OPT-NEXT: [[TMP104:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT: [[TMP105:%.*]] = insertelement <128 x i8> [[TMP103]], i8 [[TMP104]], i32 4 +; OPT-NEXT: [[TMP106:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT: [[TMP107:%.*]] = insertelement <128 x i8> [[TMP105]], i8 [[TMP106]], i32 5 +; OPT-NEXT: [[TMP108:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT: [[TMP109:%.*]] = insertelement <128 x i8> [[TMP107]], i8 [[TMP108]], i32 6 +; OPT-NEXT: [[TMP110:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT: [[TMP111:%.*]] = insertelement <128 x i8> [[TMP109]], i8 [[TMP110]], i32 7 +; OPT-NEXT: [[TMP112:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT: [[TMP113:%.*]] = insertelement <128 x i8> [[TMP111]], i8 [[TMP112]], i32 8 +; OPT-NEXT: [[TMP114:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT: [[TMP115:%.*]] = insertelement <128 x i8> [[TMP113]], i8 [[TMP114]], i32 9 +; OPT-NEXT: [[TMP116:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT: [[TMP117:%.*]] = insertelement <128 x i8> [[TMP115]], i8 [[TMP116]], i32 10 +; OPT-NEXT: [[TMP118:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT: [[TMP119:%.*]] = insertelement <128 x i8> [[TMP117]], i8 [[TMP118]], i32 11 +; OPT-NEXT: [[TMP120:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT: [[TMP121:%.*]] = insertelement <128 x i8> [[TMP119]], i8 [[TMP120]], i32 12 +; OPT-NEXT: [[TMP122:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT: [[TMP123:%.*]] = insertelement <128 x i8> [[TMP121]], i8 [[TMP122]], i32 13 +; OPT-NEXT: [[TMP124:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT: [[TMP125:%.*]] = insertelement <128 x i8> [[TMP123]], i8 [[TMP124]], i32 14 +; OPT-NEXT: [[TMP126:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT: [[TMP127:%.*]] = insertelement <128 x i8> [[TMP125]], i8 [[TMP126]], i32 15 +; OPT-NEXT: [[TMP128:%.*]] = extractelement <16 x i8> [[IN]], i64 0 +; OPT-NEXT: [[TMP129:%.*]] = insertelement <128 x i8> [[TMP127]], i8 [[TMP128]], i32 0 +; OPT-NEXT: [[TMP130:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT: [[TMP131:%.*]] = insertelement <128 x i8> [[TMP129]], i8 [[TMP130]], i32 1 +; OPT-NEXT: [[TMP132:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT: [[TMP133:%.*]] = insertelement <128 x i8> [[TMP131]], i8 [[TMP132]], i32 2 +; OPT-NEXT: [[TMP134:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT: [[TMP135:%.*]] = insertelement <128 x i8> [[TMP133]], i8 [[TMP134]], i32 3 +; OPT-NEXT: [[TMP136:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT: [[TMP137:%.*]] = insertelement <128 x i8> [[TMP135]], i8 [[TMP136]], i32 4 +; OPT-NEXT: [[TMP138:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT: [[TMP139:%.*]] = insertelement <128 x i8> [[TMP137]], i8 [[TMP138]], i32 5 +; OPT-NEXT: [[TMP140:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT: [[TMP141:%.*]] = insertelement <128 x i8> [[TMP139]], i8 [[TMP140]], i32 6 +; OPT-NEXT: [[TMP142:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT: [[TMP143:%.*]] = insertelement <128 x i8> [[TMP141]], i8 [[TMP142]], i32 7 +; OPT-NEXT: [[TMP144:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT: [[TMP145:%.*]] = insertelement <128 x i8> [[TMP143]], i8 [[TMP144]], i32 8 +; OPT-NEXT: [[TMP146:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT: [[TMP147:%.*]] = insertelement <128 x i8> [[TMP145]], i8 [[TMP146]], i32 9 +; OPT-NEXT: [[TMP148:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT: [[TMP149:%.*]] = insertelement <128 x i8> [[TMP147]], i8 [[TMP148]], i32 10 +; OPT-NEXT: [[TMP150:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT: [[TMP151:%.*]] = insertelement <128 x i8> [[TMP149]], i8 [[TMP150]], i32 11 +; OPT-NEXT: [[TMP152:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT: [[TMP153:%.*]] = insertelement <128 x i8> [[TMP151]], i8 [[TMP152]], i32 12 +; OPT-NEXT: [[TMP154:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT: [[TMP155:%.*]] = insertelement <128 x i8> [[TMP153]], i8 [[TMP154]], i32 13 +; OPT-NEXT: [[TMP156:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT: [[TMP157:%.*]] = insertelement <128 x i8> [[TMP155]], i8 [[TMP156]], i32 14 +; OPT-NEXT: [[TMP158:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT: [[TMP159:%.*]] = insertelement <128 x i8> [[TMP157]], i8 [[TMP158]], i32 15 +; OPT-NEXT: [[TMP160:%.*]] = extractelement <16 x i8> [[IN]], i64 0 +; OPT-NEXT: [[TMP161:%.*]] = insertelement <128 x i8> [[TMP159]], i8 [[TMP160]], i32 0 +; OPT-NEXT: [[TMP162:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT: [[TMP163:%.*]] = insertelement <128 x i8> [[TMP161]], i8 [[TMP162]], i32 1 +; OPT-NEXT: [[TMP164:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT: [[TMP165:%.*]] = insertelement <128 x i8> [[TMP163]], i8 [[TMP164]], i32 2 +; OPT-NEXT: [[TMP166:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT: [[TMP167:%.*]] = insertelement <128 x i8> [[TMP165]], i8 [[TMP166]], i32 3 +; OPT-NEXT: [[TMP168:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT: [[TMP169:%.*]] = insertelement <128 x i8> [[TMP167]], i8 [[TMP168]], i32 4 +; OPT-NEXT: [[TMP170:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT: [[TMP171:%.*]] = insertelement <128 x i8> [[TMP169]], i8 [[TMP170]], i32 5 +; OPT-NEXT: [[TMP172:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT: [[TMP173:%.*]] = insertelement <128 x i8> [[TMP171]], i8 [[TMP172]], i32 6 +; OPT-NEXT: [[TMP174:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT: [[TMP175:%.*]] = insertelement <128 x i8> [[TMP173]], i8 [[TMP174]], i32 7 +; OPT-NEXT: [[TMP176:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT: [[TMP177:%.*]] = insertelement <128 x i8> [[TMP175]], i8 [[TMP176]], i32 8 +; OPT-NEXT: [[TMP178:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT: [[TMP179:%.*]] = insertelement <128 x i8> [[TMP177]], i8 [[TMP178]], i32 9 +; OPT-NEXT: [[TMP180:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT: [[TMP181:%.*]] = insertelement <128 x i8> [[TMP179]], i8 [[TMP180]], i32 10 +; OPT-NEXT: [[TMP182:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT: [[TMP183:%.*]] = insertelement <128 x i8> [[TMP181]], i8 [[TMP182]], i32 11 +; OPT-NEXT: [[TMP184:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT: [[TMP185:%.*]] = insertelement <128 x i8> [[TMP183]], i8 [[TMP184]], i32 12 +; OPT-NEXT: [[TMP186:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT: [[TMP187:%.*]] = insertelement <128 x i8> [[TMP185]], i8 [[TMP186]], i32 13 +; OPT-NEXT: [[TMP188:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT: [[TMP189:%.*]] = insertelement <128 x i8> [[TMP187]], i8 [[TMP188]], i32 14 +; OPT-NEXT: [[TMP190:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT: [[TMP191:%.*]] = insertelement <128 x i8> [[TMP189]], i8 [[TMP190]], i32 15 +; OPT-NEXT: [[TMP192:%.*]] = extractelement <16 x i8> [[IN]], i64 0 +; OPT-NEXT: [[TMP193:%.*]] = insertelement <128 x i8> [[TMP191]], i8 [[TMP192]], i32 0 +; OPT-NEXT: [[TMP194:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT: [[TMP195:%.*]] = insertelement <128 x i8> [[TMP193]], i8 [[TMP194]], i32 1 +; OPT-NEXT: [[TMP196:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT: [[TMP197:%.*]] = insertelement <128 x i8> [[TMP195]], i8 [[TMP196]], i32 2 +; OPT-NEXT: [[TMP198:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT: [[TMP199:%.*]] = insertelement <128 x i8> [[TMP197]], i8 [[TMP198]], i32 3 +; OPT-NEXT: [[TMP200:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT: [[TMP201:%.*]] = insertelement <128 x i8> [[TMP199]], i8 [[TMP200]], i32 4 +; OPT-NEXT: [[TMP202:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT: [[TMP203:%.*]] = insertelement <128 x i8> [[TMP201]], i8 [[TMP202]], i32 5 +; OPT-NEXT: [[TMP204:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT: [[TMP205:%.*]] = insertelement <128 x i8> [[TMP203]], i8 [[TMP204]], i32 6 +; OPT-NEXT: [[TMP206:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT: [[TMP207:%.*]] = insertelement <128 x i8> [[TMP205]], i8 [[TMP206]], i32 7 +; OPT-NEXT: [[TMP208:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT: [[TMP209:%.*]] = insertelement <128 x i8> [[TMP207]], i8 [[TMP208]], i32 8 +; OPT-NEXT: [[TMP210:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT: [[TMP211:%.*]] = insertelement <128 x i8> [[TMP209]], i8 [[TMP210]], i32 9 +; OPT-NEXT: [[TMP212:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT: [[TMP213:%.*]] = insertelement <128 x i8> [[TMP211]], i8 [[TMP212]], i32 10 +; OPT-NEXT: [[TMP214:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT: [[TMP215:%.*]] = insertelement <128 x i8> [[TMP213]], i8 [[TMP214]], i32 11 +; OPT-NEXT: [[TMP216:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT: [[TMP217:%.*]] = insertelement <128 x i8> [[TMP215]], i8 [[TMP216]], i32 12 +; OPT-NEXT: [[TMP218:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT: [[TMP219:%.*]] = insertelement <128 x i8> [[TMP217]], i8 [[TMP218]], i32 13 +; OPT-NEXT: [[TMP220:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT: [[TMP221:%.*]] = insertelement <128 x i8> [[TMP219]], i8 [[TMP220]], i32 14 +; OPT-NEXT: [[TMP222:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT: [[TMP223:%.*]] = insertelement <128 x i8> [[TMP221]], i8 [[TMP222]], i32 15 +; OPT-NEXT: [[TMP224:%.*]] = extractelement <16 x i8> [[IN]], i64 0 +; OPT-NEXT: [[TMP225:%.*]] = insertelement <128 x i8> [[TMP223]], i8 [[TMP224]], i32 0 +; OPT-NEXT: [[TMP226:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT: [[TMP227:%.*]] = insertelement <128 x i8> [[TMP225]], i8 [[TMP226]], i32 1 +; OPT-NEXT: [[TMP228:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT: [[TMP229:%.*]] = insertelement <128 x i8> [[TMP227]], i8 [[TMP228]], i32 2 +; OPT-NEXT: [[TMP230:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT: [[TMP231:%.*]] = insertelement <128 x i8> [[TMP229]], i8 [[TMP230]], i32 3 +; OPT-NEXT: [[TMP232:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT: [[TMP233:%.*]] = insertelement <128 x i8> [[TMP231]], i8 [[TMP232]], i32 4 +; OPT-NEXT: [[TMP234:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT: [[TMP235:%.*]] = insertelement <128 x i8> [[TMP233]], i8 [[TMP234]], i32 5 +; OPT-NEXT: [[TMP236:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT: [[TMP237:%.*]] = insertelement <128 x i8> [[TMP235]], i8 [[TMP236]], i32 6 +; OPT-NEXT: [[TMP238:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT: [[TMP239:%.*]] = insertelement <128 x i8> [[TMP237]], i8 [[TMP238]], i32 7 +; OPT-NEXT: [[TMP240:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT: [[TMP241:%.*]] = insertelement <128 x i8> [[TMP239]], i8 [[TMP240]], i32 8 +; OPT-NEXT: [[TMP242:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT: [[TMP243:%.*]] = insertelement <128 x i8> [[TMP241]], i8 [[TMP242]], i32 9 +; OPT-NEXT: [[TMP244:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT: [[TMP245:%.*]] = insertelement <128 x i8> [[TMP243]], i8 [[TMP244]], i32 10 +; OPT-NEXT: [[TMP246:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT: [[TMP247:%.*]] = insertelement <128 x i8> [[TMP245]], i8 [[TMP246]], i32 11 +; OPT-NEXT: [[TMP248:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT: [[TMP249:%.*]] = insertelement <128 x i8> [[TMP247]], i8 [[TMP248]], i32 12 +; OPT-NEXT: [[TMP250:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT: [[TMP251:%.*]] = insertelement <128 x i8> [[TMP249]], i8 [[TMP250]], i32 13 +; OPT-NEXT: [[TMP252:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT: [[TMP253:%.*]] = insertelement <128 x i8> [[TMP251]], i8 [[TMP252]], i32 14 +; OPT-NEXT: [[TMP254:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT: [[TMP255:%.*]] = insertelement <128 x i8> [[TMP253]], i8 [[TMP254]], i32 15 +; OPT-NEXT: [[TMP256:%.*]] = extractelement <128 x i8> [[TMP255]], i32 80 +; OPT-NEXT: [[TMP257:%.*]] = insertelement <16 x i8> poison, i8 [[TMP256]], i64 0 +; OPT-NEXT: [[TMP258:%.*]] = extractelement <128 x i8> [[TMP255]], i32 81 +; OPT-NEXT: [[TMP259:%.*]] = insertelement <16 x i8> [[TMP257]], i8 [[TMP258]], i64 1 +; OPT-NEXT: [[TMP260:%.*]] = extractelement <128 x i8> [[TMP255]], i32 82 +; OPT-NEXT: [[TMP261:%.*]] = insertelement <16 x i8> [[TMP259]], i8 [[TMP260]], i64 2 +; OPT-NEXT: [[TMP262:%.*]] = extractelement <128 x i8> [[TMP255]], i32 83 +; OPT-NEXT: [[TMP263:%.*]] = insertelement <16 x i8> [[TMP261]], i8 [[TMP262]], i64 3 +; OPT-NEXT: [[TMP264:%.*]] = extractelement <128 x i8> [[TMP255]], i32 84 +; OPT-NEXT: [[TMP265:%.*]] = insertelement <16 x i8> [[TMP263]], i8 [[TMP264]], i64 4 +; OPT-NEXT: [[TMP266:%.*]] = extractelement <128 x i8> [[TMP255]], i32 85 +; OPT-NEXT: [[TMP267:%.*]] = insertelement <16 x i8> [[TMP265]], i8 [[TMP266]], i64 5 +; OPT-NEXT: [[TMP268:%.*]] = extractelement <128 x i8> [[TMP255]], i32 86 +; OPT-NEXT: [[TMP269:%.*]] = insertelement <16 x i8> [[TMP267]], i8 [[TMP268]], i64 6 +; OPT-NEXT: [[TMP270:%.*]] = extractelement <128 x i8> [[TMP255]], i32 87 +; OPT-NEXT: [[TMP271:%.*]] = insertelement <16 x i8> [[TMP269]], i8 [[TMP270]], i64 7 +; OPT-NEXT: [[TMP272:%.*]] = extractelement <128 x i8> [[TMP255]], i32 88 +; OPT-NEXT: [[TMP273:%.*]] = insertelement <16 x i8> [[TMP271]], i8 [[TMP272]], i64 8 +; OPT-NEXT: [[TMP274:%.*]] = extractelement <128 x i8> [[TMP255]], i32 89 +; OPT-NEXT: [[TMP275:%.*]] = insertelement <16 x i8> [[TMP273]], i8 [[TMP274]], i64 9 +; OPT-NEXT: [[TMP276:%.*]] = extractelement <128 x i8> [[TMP255]], i32 90 +; OPT-NEXT: [[TMP277:%.*]] = insertelement <16 x i8> [[TMP275]], i8 [[TMP276]], i64 10 +; OPT-NEXT: [[TMP278:%.*]] = extractelement <128 x i8> [[TMP255]], i32 91 +; OPT-NEXT: [[TMP279:%.*]] = insertelement <16 x i8> [[TMP277]], i8 [[TMP278]], i64 11 +; OPT-NEXT: [[TMP280:%.*]] = extractelement <128 x i8> [[TMP255]], i32 92 +; OPT-NEXT: [[TMP281:%.*]] = insertelement <16 x i8> [[TMP279]], i8 [[TMP280]], i64 12 +; OPT-NEXT: [[TMP282:%.*]] = extractelement <128 x i8> [[TMP255]], i32 93 +; OPT-NEXT: [[TMP283:%.*]] = insertelement <16 x i8> [[TMP281]], i8 [[TMP282]], i64 13 +; OPT-NEXT: [[TMP284:%.*]] = extractelement <128 x i8> [[TMP255]], i32 94 +; OPT-NEXT: [[TMP285:%.*]] = insertelement <16 x i8> [[TMP283]], i8 [[TMP284]], i64 14 +; OPT-NEXT: [[TMP286:%.*]] = extractelement <128 x i8> [[TMP255]], i32 95 +; OPT-NEXT: [[TMP287:%.*]] = insertelement <16 x i8> [[TMP285]], i8 [[TMP286]], i64 15 +; OPT-NEXT: [[SUM:%.*]] = add <16 x i8> [[TMP287]], [[ADD]] +; OPT-NEXT: store <16 x i8> [[SUM]], ptr addrspace(3) [[OUT]], align 16 +; OPT-NEXT: ret void +; +entry: + %alloca = alloca [8 x <16 x i8>], align 16, addrspace(5) + %gep0 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 0 + store <16 x i8> %in, ptr addrspace(5) %gep0, align 16 + %gep1 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 1 + store <16 x i8> %in, ptr addrspace(5) %gep0, align 16 + %gep2 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 2 + store <16 x i8> %in, ptr addrspace(5) %gep0, align 16 + %gep3 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 3 + store <16 x i8> %in, ptr addrspace(5) %gep0, align 16 + %gep4 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 4 + store <16 x i8> %in, ptr addrspace(5) %gep0, align 16 + %gep5 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 5 + store <16 x i8> %in, ptr addrspace(5) %gep0, align 16 + %gep6 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 6 + store <16 x i8> %in, ptr addrspace(5) %gep0, align 16 + %gep7 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 7 + store <16 x i8> %in, ptr addrspace(5) %gep0, align 16 + %load = load <16 x i8>, ptr addrspace(5) %gep5, align 16 + %sum = add <16 x i8> %load, %add + store <16 x i8> %sum, ptr addrspace(3) %out, align 16 + ret void +} + +attributes #0 = {"amdgpu-waves-per-eu"="2,2"} diff --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll index f67cbe3..ddb522a8 100644 --- a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll +++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll @@ -1,17 +1,17 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s ; -global-isel=1 SI run line skipped since store not yet implemented. ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=MEMTIME -check-prefix=GCN %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=MEMTIME -check-prefix=GCN %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=MEMTIME -check-prefix=GCN %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250 %s declare i64 @llvm.readcyclecounter() #0 diff --git a/llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll b/llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll new file mode 100644 index 0000000..22e4a24 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll @@ -0,0 +1,236 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s + +define i16 @v_underflow_compare_fold_i16(i16 %a, i16 %b) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u16_e32 v1, v0, v1 +; GFX9-NEXT: v_min_u16_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u16 v0.h, v0.l, v1.l +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_min_u16 v0.l, v0.h, v0.l +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i16 %a, %b + %cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a) + ret i16 %cond +} + +define i32 @v_underflow_compare_fold_i32(i32 %a, i32 %b) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1 +; GFX9-NEXT: v_min_u32_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_min_u32_e32 v0, v1, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i32 %a, %b + %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a) + ret i32 %cond +} + +define i32 @v_underflow_compare_fold_i32_commute(i32 %a, i32 %b) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i32_commute: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1 +; GFX9-NEXT: v_min_u32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i32_commute: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_min_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i32 %a, %b + %cond = call i32 @llvm.umin.i32(i32 %a, i32 %sub) + ret i32 %cond +} + +define i32 @v_underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i32_multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1 +; GFX9-NEXT: v_min_u32_e32 v0, v1, v0 +; GFX9-NEXT: global_store_dword v[2:3], v1, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i32_multi_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_min_u32_e32 v0, v1, v0 +; GFX11-NEXT: global_store_b32 v[2:3], v1, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i32 %a, %b + store i32 %sub, ptr addrspace(1) %ptr + %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a) + ret i32 %cond +} + +define i64 @v_underflow_compare_fold_i64(i64 %a, i64 %b) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc +; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo +; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] +; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %a, %b + %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a) + ret i64 %cond +} + +define i64 @v_underflow_compare_fold_i64_commute(i64 %a, i64 %b) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i64_commute: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc +; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i64_commute: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo +; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %a, %b + %cond = call i64 @llvm.umin.i64(i64 %a, i64 %sub) + ret i64 %cond +} + +define i64 @v_underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i64_multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc +; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[2:3], off +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i64_multi_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo +; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] +; GFX11-NEXT: global_store_b64 v[4:5], v[2:3], off +; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %a, %b + store i64 %sub, ptr addrspace(1) %ptr + %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a) + ret i64 %cond +} + +define amdgpu_ps i16 @s_underflow_compare_fold_i16(i16 inreg %a, i16 inreg %b) #0 { +; GFX9-LABEL: s_underflow_compare_fold_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_sub_i32 s1, s0, s1 +; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX9-NEXT: s_and_b32 s1, s1, 0xffff +; GFX9-NEXT: s_min_u32 s0, s1, s0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: s_underflow_compare_fold_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_sub_i32 s1, s0, s1 +; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX11-NEXT: s_and_b32 s1, s1, 0xffff +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_min_u32 s0, s1, s0 +; GFX11-NEXT: ; return to shader part epilog + %sub = sub i16 %a, %b + %cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a) + ret i16 %cond +} + +define amdgpu_ps i32 @s_underflow_compare_fold_i32(i32 inreg %a, i32 inreg %b) #0 { +; GFX9-LABEL: s_underflow_compare_fold_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_sub_i32 s1, s0, s1 +; GFX9-NEXT: s_min_u32 s0, s1, s0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: s_underflow_compare_fold_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_sub_i32 s1, s0, s1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_min_u32 s0, s1, s0 +; GFX11-NEXT: ; return to shader part epilog + %sub = sub i32 %a, %b + %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a) + ret i32 %cond +} + +define amdgpu_ps i64 @s_underflow_compare_fold_i64(i64 inreg %a, i64 inreg %b) #0 { +; GFX9-LABEL: s_underflow_compare_fold_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_sub_u32 s2, s0, s2 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: s_subb_u32 s3, s1, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1] +; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec +; GFX9-NEXT: s_cselect_b32 s1, s3, s1 +; GFX9-NEXT: s_cselect_b32 s0, s2, s0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: s_underflow_compare_fold_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_sub_u32 s2, s0, s2 +; GFX11-NEXT: s_subb_u32 s3, s1, s3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[2:3], s[0:1] +; GFX11-NEXT: s_and_b32 s4, s4, exec_lo +; GFX11-NEXT: s_cselect_b32 s0, s2, s0 +; GFX11-NEXT: s_cselect_b32 s1, s3, s1 +; GFX11-NEXT: ; return to shader part epilog + %sub = sub i64 %a, %b + %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a) + ret i64 %cond +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir index 8a70a8a..32cc398 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir @@ -36,7 +36,7 @@ body: | ; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l /*v386.l*/ $vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef $vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x8a + ; GCN-NEXT: s_set_vgpr_msb 0x458a ; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=0 ; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/ $vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef $vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode @@ -50,7 +50,7 @@ body: | ; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l /*v642.l*/ $vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef $vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xcf + ; GCN-NEXT: s_set_vgpr_msb 0x8acf ; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=0 ; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/ $vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir index f508df2..7e1c28f 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir @@ -22,13 +22,13 @@ body: | $vgpr257 = V_MOV_B32_e32 undef $vgpr510, implicit $exec ; Single bit change - ; GCN-NEXT: s_set_vgpr_msb 1 + ; GCN-NEXT: s_set_vgpr_msb 0x4101 ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 ; GCN-NEXT: v_rcp_f32_e64 v255, v2 /*v258*/ $vgpr255 = V_RCP_F32_e64 0, undef $vgpr258, 0, 0, implicit $exec, implicit $mode ; Reset - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x100 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: v_rcp_f32_e64 v255, v1 $vgpr255 = V_RCP_F32_e64 0, undef $vgpr1, 0, 0, implicit $exec, implicit $mode @@ -40,7 +40,7 @@ body: | ; GCN-NEXT: v_add_nc_u32_e32 v0, v253 /*v509*/, v252 /*v508*/ $vgpr0 = V_ADD_U32_e32 undef $vgpr509, undef $vgpr508, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x44 + ; GCN-NEXT: s_set_vgpr_msb 0x544 ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GCN-NEXT: v_add_f32_e64 v2 /*v258*/, v0, v251 /*v507*/ @@ -48,7 +48,7 @@ body: | ; VOP3 - ; GCN-NEXT: s_set_vgpr_msb 0x55 + ; GCN-NEXT: s_set_vgpr_msb 0x4455 ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1 ; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/ $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode @@ -58,32 +58,32 @@ body: | $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode ; Tuple crossing the 256 boundary - ; GCN-NEXT: s_set_vgpr_msb 17 + ; GCN-NEXT: s_set_vgpr_msb 0x5511 ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1 ; GCN-NEXT: v_mqsad_u32_u8 v[254:257], v[2:3] /*v[258:259]*/, v0, v[244:247] /*v[500:503]*/ $vgpr254_vgpr255_vgpr256_vgpr257 = V_MQSAD_U32_U8_e64 $vgpr258_vgpr259, $vgpr0, undef $vgpr500_vgpr501_vgpr502_vgpr503, 0, implicit $exec ; DPP/tied operand - ; GCN-NEXT: s_set_vgpr_msb 0x45 + ; GCN-NEXT: s_set_vgpr_msb 0x1145 ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=0 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GCN-NEXT: v_add_nc_u16_e64_dpp v0 /*v256*/, v1 /*v257*/, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 $vgpr256 = V_ADD_NC_U16_fake16_e64_dpp $vgpr256, 0, $vgpr257, 0, undef $vgpr258, 0, 0, 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 17 + ; GCN-NEXT: s_set_vgpr_msb 0x4511 ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1 ; GCN-NEXT: v_add3_u32_e64_dpp v0, v1 /*v257*/, v0, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 $vgpr0 = V_ADD3_U32_e64_dpp $vgpr0, $vgpr257, $vgpr0, undef $vgpr258, 1, 15, 15, 1, implicit $exec ; DS (addr, data0, and data1 operands) - ; GCN-NEXT: s_set_vgpr_msb 20 + ; GCN-NEXT: s_set_vgpr_msb 0x1114 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=1 ; GCN-NEXT: ds_store_2addr_b32 v0, v248 /*v504*/, v249 /*v505*/ offset1:1 DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr504, undef $vgpr505, 0, 1, 0, implicit $exec ; Reset - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x1400 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: ds_store_2addr_b32 v0, v248, v249 offset1:1 DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr248, undef $vgpr249, 0, 1, 0, implicit $exec @@ -93,13 +93,13 @@ body: | ; GCN-NEXT: ds_load_b32 v0, v255 /*v511*/ $vgpr0 = DS_READ_B32_gfx9 $vgpr511, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x44 + ; GCN-NEXT: s_set_vgpr_msb 0x144 ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0 ; GCN-NEXT: ds_add_rtn_u32 v255 /*v511*/, v0, v248 /*v504*/ $vgpr511 = DS_ADD_RTN_U32_gfx9 $vgpr0, undef $vgpr504, 0, 0, implicit $exec ; Reset - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4400 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: ds_add_rtn_u32 v0, v0, v0 $vgpr0 = DS_ADD_RTN_U32_gfx9 $vgpr0, $vgpr0, 0, 0, implicit $exec @@ -111,17 +111,17 @@ body: | ; GCN-NEXT: global_load_b32 v2, v[2:3] /*v[258:259]*/, off $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr258_vgpr259, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 64 + ; GCN-NEXT: s_set_vgpr_msb 0x140 ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 ; GCN-NEXT: global_load_b32 v255 /*v511*/, v0, s[0:1] $vgpr511 = GLOBAL_LOAD_DWORD_SADDR undef $sgpr0_sgpr1, $vgpr0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 1 + ; GCN-NEXT: s_set_vgpr_msb 0x4001 ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 ; GCN-NEXT: scratch_load_u8 v0, v255 /*v511*/, s0 $vgpr0 = SCRATCH_LOAD_UBYTE_SVS $vgpr511, undef $sgpr0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x100 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: global_store_b32 v[0:1], v2, off GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec @@ -135,13 +135,13 @@ body: | ; GCN-NEXT: global_store_b96 v[0:1] /*v[256:257]*/, v[244:246] /*v[500:502]*/, off GLOBAL_STORE_DWORDX3 $vgpr256_vgpr257, $vgpr500_vgpr501_vgpr502, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x44 + ; GCN-NEXT: s_set_vgpr_msb 0x544 ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0 ; GCN-NEXT: flat_atomic_add_u32 v254 /*v510*/, v[0:1], v255 /*v511*/ th:TH_ATOMIC_RETURN $vgpr510 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr511, 0, 1, implicit $exec, implicit $flat_scr ; Reset - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4400 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: flat_atomic_add_u32 v0, v[0:1], v255 th:TH_ATOMIC_RETURN $vgpr0 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr255, 0, 1, implicit $exec, implicit $flat_scr @@ -156,12 +156,12 @@ body: | ; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0, s[8:11], s3 offen $vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr0, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x41 + ; GCN-NEXT: s_set_vgpr_msb 0x4041 ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0 ; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0 /*v256*/, s[8:11], s3 offen $vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr256, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4100 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: buffer_store_b32 v0, v1, s[0:3], s3 offen BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec @@ -171,7 +171,7 @@ body: | ; GCN-NEXT: buffer_store_b32 v0 /*v256*/, v1 /*v257*/, s[0:3], s3 offen BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr256, $vgpr257, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4100 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s3 offen BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, implicit $exec @@ -183,44 +183,44 @@ body: | ; VGPRs above 512 - ; GCN-NEXT: s_set_vgpr_msb 0xaa + ; GCN-NEXT: s_set_vgpr_msb 0x41aa ; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=2 ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/ $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xab + ; GCN-NEXT: s_set_vgpr_msb 0xaaab ; ASM-SAME: ; msbs: dst=2 src0=3 src1=2 src2=2 ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v0 /*v768*/, v2 /*v514*/, v3 /*v515*/ $vgpr512 = V_FMA_F32_e64 0, undef $vgpr768, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xae + ; GCN-NEXT: s_set_vgpr_msb 0xabae ; ASM-SAME: ; msbs: dst=2 src0=2 src1=3 src2=2 ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v770*/, v3 /*v515*/ $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr770, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xba + ; GCN-NEXT: s_set_vgpr_msb 0xaeba ; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=3 ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v771*/ $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xea + ; GCN-NEXT: s_set_vgpr_msb 0xbaea ; ASM-SAME: ; msbs: dst=3 src0=2 src1=2 src2=2 ; GCN-NEXT: v_fma_f32 v255 /*v1023*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/ $vgpr1023 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xff + ; GCN-NEXT: s_set_vgpr_msb 0xeaff ; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=3 ; GCN-NEXT: v_fma_f32 v0 /*v768*/, v1 /*v769*/, v2 /*v770*/, v3 /*v771*/ $vgpr768 = V_FMA_F32_e64 0, undef $vgpr769, 0, undef $vgpr770, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x42 + ; GCN-NEXT: s_set_vgpr_msb 0xff42 ; ASM-SAME: ; msbs: dst=1 src0=2 src1=0 src2=0 ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0 /*v512*/ $vgpr256 = V_MOV_B32_e32 undef $vgpr512, implicit $exec ; Reset - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4200 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: v_fma_f32 v0, v1, v2, v3 $vgpr0 = V_FMA_F32_e64 0, undef $vgpr1, 0, undef $vgpr2, 0, undef $vgpr3, 0, 0, implicit $exec, implicit $mode @@ -232,12 +232,12 @@ body: | ; GCN-NEXT: global_store_b96 v[0:1] /*v[512:513]*/, v[0:2] /*v[512:514]*/, off GLOBAL_STORE_DWORDX3 $vgpr512_vgpr513, $vgpr512_vgpr513_vgpr514, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 11 + ; GCN-NEXT: s_set_vgpr_msb 0xa0b ; ASM-SAME: ; msbs: dst=0 src0=3 src1=2 src2=0 ; GCN-NEXT: global_store_b64 v[254:255] /*v[1022:1023]*/, v[254:255] /*v[766:767]*/, off GLOBAL_STORE_DWORDX2 $vgpr1022_vgpr1023, $vgpr766_vgpr767, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x55 + ; GCN-NEXT: s_set_vgpr_msb 0xb55 ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1 ; GCN-NEXT: v_wmma_f32_16x16x32_bf16 v[14:21] /*v[270:277]*/, v[26:33] /*v[282:289]*/, v[34:41] /*v[290:297]*/, v[14:21] /*v[270:277]*/ early-clobber $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, undef $vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287_vgpr288_vgpr289, 8, undef $vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297, 8, killed undef $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277, 0, 0, 0, 0, implicit $exec @@ -247,6 +247,7 @@ body: | ... # ASM-LABEL: {{^}}vopd: + # DIS-LABEL: <vopd>: --- name: vopd @@ -262,35 +263,35 @@ body: | ; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, v1, v2 :: v_dual_mul_f32 v0 /*v256*/, v3, v4 $vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr2, undef $vgpr3, undef $vgpr4, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x41 + ; GCN-NEXT: s_set_vgpr_msb 0x4041 ; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, s1, v2 :: v_dual_mul_f32 v0 /*v256*/, v44 /*v300*/, v4 $vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $sgpr1, undef $vgpr2, undef $vgpr300, undef $vgpr4, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 4 + ; GCN-NEXT: s_set_vgpr_msb 0x4104 ; GCN-NEXT: v_dual_sub_f32 v255, v1, v44 /*v300*/ :: v_dual_mul_f32 v6, v0, v1 /*v257*/ $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr300, undef $vgpr0, $vgpr257, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 1 + ; GCN-NEXT: s_set_vgpr_msb 0x401 ; GCN-NEXT: v_dual_sub_f32 v255, 0, v1 :: v_dual_mul_f32 v6, v44 /*v300*/, v3 $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 0, undef $vgpr1, undef $vgpr300, undef $vgpr3, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 64 + ; GCN-NEXT: s_set_vgpr_msb 0x140 ; GCN-NEXT: v_dual_fmamk_f32 v243 /*v499*/, v0, 0xa, v3 :: v_dual_fmac_f32 v0 /*v256*/, v1, v1 $vgpr499, $vgpr256 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr3, undef $vgpr1, undef $vgpr1, $vgpr256, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 5 + ; GCN-NEXT: s_set_vgpr_msb 0x4005 ; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/ $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x44 + ; GCN-NEXT: s_set_vgpr_msb 0x544 ; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/ $vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 16 + ; GCN-NEXT: s_set_vgpr_msb 0x4410 ; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/ $vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x1000 ; GCN-NEXT: v_dual_fmac_f32 v2, v6, v6 :: v_dual_fma_f32 v3, v4, v5, v3 $vgpr2, $vgpr3 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, undef $vgpr2, 0, undef $vgpr4, 0, undef $vgpr5, 0, $vgpr3, implicit $mode, implicit $exec @@ -298,7 +299,7 @@ body: | ; GCN-NEXT: v_dual_fma_f32 v244 /*v500*/, v6, v7, v8 :: v_dual_add_f32 v3 /*v259*/, v4, v5 $vgpr500, $vgpr259 = V_DUAL_FMA_F32_e64_X_ADD_F32_e32_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr7, 0, undef $vgpr8, 0, undef $vgpr4, 0, undef $vgpr5, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0xae + ; GCN-NEXT: s_set_vgpr_msb 0x40ae ; GCN-NEXT: v_dual_fmac_f32 v2 /*v514*/, v6 /*v518*/, v8 /*v776*/ :: v_dual_fma_f32 v3 /*v515*/, v4 /*v516*/, v7 /*v775*/, v3 /*v515*/ $vgpr514, $vgpr515 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr518, 0, undef $vgpr776, undef $vgpr514, 0, undef $vgpr516, 0, undef $vgpr775, 0, $vgpr515, implicit $mode, implicit $exec @@ -319,31 +320,31 @@ body: | ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2 /*v258*/, 0x1 $vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 5 + ; GCN-NEXT: s_set_vgpr_msb 0x4505 ; GCN-NEXT: v_fmaak_f32 v0, v1 /*v257*/, v2 /*v258*/, 0x1 $vgpr0 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x41 + ; GCN-NEXT: s_set_vgpr_msb 0x541 ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2, 0x1 $vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr2, 1, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x44 + ; GCN-NEXT: s_set_vgpr_msb 0x4144 ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1, v2 /*v258*/, 0x1 $vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x45 + ; GCN-NEXT: s_set_vgpr_msb 0x4445 ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/ $vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 5 + ; GCN-NEXT: s_set_vgpr_msb 0x4505 ; GCN-NEXT: v_fmamk_f32 v0, v1 /*v257*/, 0x1, v2 /*v258*/ $vgpr0 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x41 + ; GCN-NEXT: s_set_vgpr_msb 0x541 ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 $vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr2, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x44 + ; GCN-NEXT: s_set_vgpr_msb 0x4144 ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1, 0x1, v2 /*v258*/ $vgpr256 = V_FMAMK_F32 undef $vgpr1, 1, undef $vgpr258, implicit $exec, implicit $mode @@ -389,15 +390,15 @@ body: | ; GCN-NEXT: v_lshlrev_b32_e64 v0, v0 /*v256*/, v2 $vgpr0 = V_LSHLREV_B32_e64 undef $vgpr256, undef $vgpr2, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 4 + ; GCN-NEXT: s_set_vgpr_msb 0x104 ; GCN-NEXT: v_lshlrev_b32_e64 v0, v1, v0 /*v256*/ $vgpr0 = V_LSHLREV_B32_e64 undef $vgpr1, undef $vgpr256, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 1 + ; GCN-NEXT: s_set_vgpr_msb 0x401 ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v0 /*v256*/, v2 $vgpr0 = V_SUBREV_U32_e32 undef $vgpr256, undef $vgpr2, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 4 + ; GCN-NEXT: s_set_vgpr_msb 0x104 ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v1, v0 /*v256*/ $vgpr0 = V_SUBREV_U32_e32 undef $vgpr1, undef $vgpr256, implicit $exec @@ -417,7 +418,7 @@ body: | ; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/ $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x5500 ; GCN-NEXT: v_add_nc_u32_e32 v0, v1, v2 $vgpr0 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr2, implicit $exec @@ -431,7 +432,7 @@ body: | ; GCN-NEXT: v_add_nc_u32_e32 v0 /*v256*/, v1, v2 $vgpr256 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr2, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 ; GCN-NEXT: v_fma_f32 v3, v4, v5, s2 $vgpr3 = V_FMA_F32_e64 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $sgpr2, 0, 0, implicit $exec, implicit $mode @@ -439,17 +440,17 @@ body: | ; GCN-NEXT: v_fma_f32 v3, v4 /*v260*/, v5, 1 $vgpr3 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr5, 0, 1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 4 + ; GCN-NEXT: s_set_vgpr_msb 0x104 ; GCN-NEXT: v_mov_b32_e32 v0, v1 $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec ; GCN-NEXT: v_add_nc_u32_e32 v2, v1, v3 /*v259*/ $vgpr2 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr259, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 1 + ; GCN-NEXT: s_set_vgpr_msb 0x401 ; GCN-NEXT: v_mov_b32_e32 v0, v0 /*v256*/ ; GCN-NEXT: v_add_nc_u32_e32 v1, v1 /*v257*/, v1 - ; GCN-NEXT: s_set_vgpr_msb 5 + ; GCN-NEXT: s_set_vgpr_msb 0x105 ; GCN-NEXT: v_add_nc_u32_e32 v2, v2 /*v258*/, v2 /*v258*/ $vgpr0 = V_MOV_B32_e32 undef $vgpr256, implicit $exec $vgpr1 = V_ADD_U32_e32 undef $vgpr257, undef $vgpr1, implicit $exec @@ -478,16 +479,18 @@ body: | ; ASM: .LBB{{.*_1}}: ; GCN-NEXT: s_set_vgpr_msb 64 ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec - ; No mode switch on fall through + ; Reset on fallthrough block end bb.2: ; ASM-NEXT: %bb.2: - ; GCN-NEXT: s_nop 0 - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 64 + ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 ; GCN-NEXT: s_branch - S_NOP 0 + $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec S_BRANCH %bb.3 ; Reset mode on terminator @@ -496,7 +499,7 @@ body: | ; ASM: .LBB{{.*_3}}: ; GCN-NEXT: s_set_vgpr_msb 64 ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 ; GCN-NEXT: s_swap_pc_i64 $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec $exec = S_SWAPPC_B64 undef $sgpr0_sgpr1 @@ -518,7 +521,7 @@ body: | ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_set_vgpr_msb 64 ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 ; GCN-NEXT: s_set_pc_i64 $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec @@ -538,7 +541,7 @@ body: | ; ASM-NEXT: %bb.7: ; GCN-NEXT: s_set_vgpr_msb 64 ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 ; ASM-NEXT: ; return to shader part epilog $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec SI_RETURN_TO_EPILOG undef $vgpr0, implicit-def $exec @@ -556,7 +559,7 @@ body: | ; ASM-NEXT: %bb.9: ; GCN-NEXT: s_set_vgpr_msb 64 ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 ; GCN-NEXT: s_set_pc_i64 $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec S_SETPC_B64_return undef $sgpr0_sgpr1, implicit-def $exec @@ -574,13 +577,14 @@ body: | ; ASM: %bb.0: ; GCN-NEXT: s_set_vgpr_msb 64 ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 $vgpr256 = V_MOV_B32_e32 undef $vgpr0, implicit $exec bb.1: ; ASM: .LBB{{[0-9]+}}_1: ; GCN-NEXT: s_set_vgpr_msb 64 ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1 - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 ; GCN-NEXT: s_cbranch_scc0 $vgpr257 = V_MOV_B32_e32 undef $vgpr1, implicit $exec S_CBRANCH_SCC0 %bb.1, undef implicit $scc @@ -604,7 +608,7 @@ body: | ; ASM: %bb.0: ; GCN-NEXT: s_set_vgpr_msb 64 ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 ; ASM: def v0 ; GCN-NOT: s_set_vgpr_msb ; ASM: use v0 @@ -638,7 +642,7 @@ body: | ; GCN-NEXT: s_set_vgpr_msb 64 ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 ; GCN-NEXT: s_nop 0 - ; GCN-NEXT: s_set_vgpr_msb 1 + ; GCN-NEXT: s_set_vgpr_msb 0x4001 ; GCN-NEXT: v_mov_b32_e32 v1, v0 /*v256*/ BUNDLE implicit-def $vgpr256 { $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec @@ -680,7 +684,7 @@ body: | ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1 - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 ; GCN-NEXT: v_mov_b32_e32 v2, v1 ; GCN-NEXT: v_mov_b32_e32 v3, v1 BUNDLE implicit-def $vgpr256, implicit-def $vgpr257, implicit-def $vgpr2, implicit-def $vgpr3, implicit undef $vgpr1 { @@ -709,7 +713,7 @@ body: | ; GCN-NEXT: s_clause 0x3e ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4000 ; GCN-NEXT: v_mov_b32_e32 v1, v1 ; GCN-NEXT: v_mov_b32_e32 v2, v1 ; GCN-COUNT-60: v_mov_b32_e32 v1, v1 @@ -823,7 +827,7 @@ body: | ; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[244:259] /*v[500:515]*/, v[10:17], v1, v2 $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x500 ; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[100:115], v[100:115], v[10:17], v1, v2 $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec @@ -835,11 +839,11 @@ body: | ; GCN-NEXT: v_wmma_ld_scale16_paired_b64 v[0:1], v[2:3] V_WMMA_LD_SCALE16_PAIRED_B64 undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 5 + ; GCN-NEXT: s_set_vgpr_msb 0x105 ; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[244:259] /*v[500:515]*/, v[10:17], v[0:1], v[2:3] $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x500 ; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[100:115], v[100:115], v[10:17], v[0:1], v[2:3] $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir b/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir index 1b8e126..a1381ec 100644 --- a/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir @@ -945,7 +945,6 @@ body: | $vgpr0 = V_MOV_B32_e32 0, implicit $exec ... -# FIXME: Missing S_WAIT_XCNT before overwriting vgpr0. --- name: wait_kmcnt_with_outstanding_vmem_2 tracksRegLiveness: true @@ -971,6 +970,7 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2 + ; GCN-NEXT: S_WAIT_XCNT 0 ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec bb.0: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc @@ -986,6 +986,180 @@ body: | ... --- +name: wait_kmcnt_and_wait_loadcnt +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: wait_kmcnt_and_wait_loadcnt + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: liveins: $sgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_WAIT_KMCNT 0 + ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2 + ; GCN-NEXT: S_WAIT_LOADCNT 0 + ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec + bb.0: + liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 + S_CBRANCH_SCC1 %bb.2, implicit $scc + bb.1: + liveins: $vgpr0_vgpr1, $sgpr2 + $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec + bb.2: + liveins: $sgpr2 + $sgpr2 = S_MOV_B32 $sgpr2 + $vgpr2 = V_MOV_B32_e32 0, implicit $exec +... + +--- +name: implicit_handling_of_pending_vmem_group +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: implicit_handling_of_pending_vmem_group + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_WAIT_KMCNT 0 + ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2 + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 + ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: S_WAIT_XCNT 0 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0 + bb.0: + liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 + S_CBRANCH_SCC1 %bb.2, implicit $scc + bb.1: + liveins: $vgpr0_vgpr1, $sgpr2 + $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec + bb.2: + liveins: $sgpr0_sgpr1, $sgpr2 + $sgpr2 = S_MOV_B32 $sgpr2 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $sgpr0 = S_MOV_B32 $sgpr0 +... + +--- +name: pending_vmem_event_between_block +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: pending_vmem_event_between_block + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec + ; GCN-NEXT: $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_WAIT_KMCNT 0 + ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2 + ; GCN-NEXT: S_WAIT_XCNT 1 + ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: S_WAIT_XCNT 0 + ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0 + bb.0: + liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 + S_CBRANCH_SCC1 %bb.2, implicit $scc + bb.1: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2 + $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec + $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec + bb.2: + liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2 + $sgpr2 = S_MOV_B32 $sgpr2 + $vgpr1 = V_MOV_B32_e32 0, implicit $exec + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + $sgpr0 = S_MOV_B32 $sgpr0 +... + +--- +name: flushing_vmem_cnt_on_block_entry +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: flushing_vmem_cnt_on_block_entry + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec + ; GCN-NEXT: $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_WAIT_XCNT 0 + ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0 + bb.0: + liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 + S_CBRANCH_SCC1 %bb.2, implicit $scc + bb.1: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2 + $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec + $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec + bb.2: + liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2 + $vgpr1 = V_MOV_B32_e32 0, implicit $exec + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + $sgpr0 = S_MOV_B32 $sgpr0 +... + +--- name: wait_loadcnt_with_outstanding_smem tracksRegLiveness: true machineFunctionInfo: diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll index a42c8ac7..7581710 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll @@ -3182,7 +3182,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s33 offset:1592 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s33 offset:1596 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s33 offset:1600 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s33 offset:1604 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s33 offset:1608 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s33 offset:1612 @@ -3443,7 +3443,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s33 offset:2616 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s33 offset:2620 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s33 offset:2624 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s33 offset:2628 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s33 offset:2632 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s33 offset:2636 @@ -3706,7 +3706,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v1023*/, s33 offset:3648 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s0, 3 @@ -4135,7 +4135,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v509*/, off, s33 offset:1592 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v510*/, off, s33 offset:1596 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v511*/, off, s33 offset:1600 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80 ; msbs: dst=2 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v512*/, off, s33 offset:1604 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v513*/, off, s33 offset:1608 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v514*/, off, s33 offset:1612 @@ -4396,7 +4396,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v765*/, off, s33 offset:2616 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v766*/, off, s33 offset:2620 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v767*/, off, s33 offset:2624 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc0 ; msbs: dst=3 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v768*/, off, s33 offset:2628 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v769*/, off, s33 offset:2632 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v770*/, off, s33 offset:2636 @@ -4661,7 +4661,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s4 ; GFX1250-DAGISEL-NEXT: s_mov_b32 s33, s0 ; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc000 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31] %ret = call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent ret <2 x half> %ret @@ -6346,7 +6346,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s32 offset:1588 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s32 offset:1592 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s32 offset:1596 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s32 offset:1600 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s32 offset:1604 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s32 offset:1608 @@ -6607,7 +6607,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s32 offset:2612 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s32 offset:2616 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s32 offset:2620 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s32 offset:2624 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s32 offset:2628 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s32 offset:2632 @@ -6872,7 +6872,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; GFX1250-DAGISEL-NEXT: v_mov_b32_e32 v2, v0 ; GFX1250-DAGISEL-NEXT: s_mov_b64 s[36:37], gfx_callee@abs64 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: v_swap_b32 v0, v1 ; GFX1250-DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e @@ -7283,7 +7283,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v509*/, off, s32 offset:1588 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v510*/, off, s32 offset:1592 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v511*/, off, s32 offset:1596 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80 ; msbs: dst=2 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v512*/, off, s32 offset:1600 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v513*/, off, s32 offset:1604 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v514*/, off, s32 offset:1608 @@ -7544,7 +7544,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v765*/, off, s32 offset:2612 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v766*/, off, s32 offset:2616 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v767*/, off, s32 offset:2620 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc0 ; msbs: dst=3 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v768*/, off, s32 offset:2624 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v769*/, off, s32 offset:2628 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v770*/, off, s32 offset:2632 @@ -7807,7 +7807,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v1023*/, off, s32 offset:3644 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc000 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[36:37] %ret = tail call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent ret <2 x half> %ret @@ -9657,7 +9657,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s33 offset:1600 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s33 offset:1604 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s33 offset:1608 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s33 offset:1612 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s33 offset:1616 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s33 offset:1620 @@ -9918,7 +9918,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s33 offset:2624 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s33 offset:2628 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s33 offset:2632 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s33 offset:2636 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s33 offset:2640 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s33 offset:2644 @@ -10181,7 +10181,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v1023*/, s33 offset:3656 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: s_clause 0x2 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42, s33 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164 @@ -10616,7 +10616,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v509*/, off, s33 offset:1600 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v510*/, off, s33 offset:1604 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v511*/, off, s33 offset:1608 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80 ; msbs: dst=2 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v512*/, off, s33 offset:1612 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v513*/, off, s33 offset:1616 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v514*/, off, s33 offset:1620 @@ -10877,7 +10877,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v765*/, off, s33 offset:2624 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v766*/, off, s33 offset:2628 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v767*/, off, s33 offset:2632 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc0 ; msbs: dst=3 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v768*/, off, s33 offset:2636 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v769*/, off, s33 offset:2640 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v770*/, off, s33 offset:2644 @@ -11142,7 +11142,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s4 ; GFX1250-DAGISEL-NEXT: s_mov_b32 s33, s0 ; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc000 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31] %ret = call float(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x float> %x) convergent store float %ret, ptr %p diff --git a/llvm/test/CodeGen/Hexagon/isel-fclass.ll b/llvm/test/CodeGen/Hexagon/isel-fclass.ll new file mode 100644 index 0000000..96b0210 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel-fclass.ll @@ -0,0 +1,86 @@ +; Tests lowering of sfclass/dfclass compares. +; Sub-optimal code +; { +; p0 = sfclass(r0,#16) +; r0 = sfadd(r0,r0) +; } +; { +; r2 = p0 +; } +; { +; if (p0.new) r0 = ##1065353216 +; p0 = cmp.eq(r2,#0) +; jumpr r31 +; } +; With the patterns added, we should be generating +; { +; p0 = sfclass(r0,#16) +; r0 = sfadd(r0,r0) +; } +; { +; if (!p0) r0 = ##1065353216 +; jumpr r31 +; } + +; RUN: llc -march=hexagon -stop-after=hexagon-isel %s -o - | FileCheck %s + +; CHECK: bb.0.entry1 +; CHECK: F2_sfclass +; CHECK-NOT: C2_cmp +; CHECK: C2_not +; CHECK: F2_sfadd +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define float @test1(float noundef %x) { +entry1: + %0 = tail call i32 @llvm.hexagon.F2.sfclass(float %x, i32 16) + %tobool.not = icmp eq i32 %0, 0 + %add = fadd float %x, %x + %spec.select = select i1 %tobool.not, float 1.000000e+00, float %add + ret float %spec.select +} + +; CHECK: bb.0.entry2 +; CHECK: F2_sfclass +; CHECK-NOT: C2_cmp +; CHECK: F2_sfadd +define float @test2(float noundef %x) { +entry2: + %0 = tail call i32 @llvm.hexagon.F2.sfclass(float %x, i32 16) + %tobool.not = icmp eq i32 %0, 0 + %add = fadd float %x, %x + %spec.select = select i1 %tobool.not, float %add, float 1.000000e+00 + ret float %spec.select +} + +; CHECK: bb.0.entry3 +; CHECK: F2_dfclass +; CHECK-NOT: C2_cmp +; CHECK: C2_not +; CHECK: F2_dfadd +define double @test3(double noundef %x) { +entry3: + %0 = tail call i32 @llvm.hexagon.F2.dfclass(double %x, i32 16) + %tobool.not = icmp eq i32 %0, 0 + %add = fadd double %x, %x + %spec.select = select i1 %tobool.not, double 1.000000e+00, double %add + ret double %spec.select +} + +; CHECK: bb.0.entry4 +; CHECK: F2_dfclass +; CHECK-NOT: C2_cmp +; CHECK: F2_dfadd +define double @test4(double noundef %x) { +entry4: + %0 = tail call i32 @llvm.hexagon.F2.dfclass(double %x, i32 16) + %tobool.not = icmp eq i32 %0, 0 + %add = fadd double %x, %x + %spec.select = select i1 %tobool.not, double %add, double 1.000000e+00 + ret double %spec.select +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare i32 @llvm.hexagon.F2.dfclass(double, i32 immarg) + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare i32 @llvm.hexagon.F2.sfclass(float, i32 immarg) diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll index ba2118f..b3155c9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll @@ -106,6 +106,69 @@ define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind { ret void } +define void @not_ctlz_v32i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 +; CHECK-NEXT: xvclz.b $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <32 x i8>, ptr %src + %neg = xor <32 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %neg, i1 false) + store <32 x i8> %res, ptr %dst + ret void +} + +define void @not_ctlz_v16i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i16>, ptr %src + %neg = xor <16 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> + %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %neg, i1 false) + store <16 x i16> %res, ptr %dst + ret void +} + +define void @not_ctlz_v8i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i32>, ptr %src + %neg = xor <8 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %neg, i1 false) + store <8 x i32> %res, ptr %dst + ret void +} + +define void @not_ctlz_v4i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i64>, ptr %src + %neg = xor <4 x i64> %v, <i64 -1, i64 -1, i64 -1, i64 -1> + %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %neg, i1 false) + store <4 x i64> %res, ptr %dst + ret void +} + declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll index 79407c3..fa5f27e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll @@ -7,38 +7,8 @@ define void @ceil_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrp.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrp.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrp.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrp.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -52,21 +22,7 @@ define void @ceil_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrp.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrp.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrp.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -81,38 +37,8 @@ define void @floor_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrm.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrm.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrm.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrm.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -126,21 +52,7 @@ define void @floor_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrm.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrm.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrm.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -155,38 +67,8 @@ define void @trunc_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrz.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrz.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrz.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrz.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -200,21 +82,7 @@ define void @trunc_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrz.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrz.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrz.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -229,38 +97,8 @@ define void @roundeven_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrne.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrne.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrne.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrne.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -274,21 +112,7 @@ define void @roundeven_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrne.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrne.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrne.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll index a9a38e8..6ac7d51 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll @@ -106,6 +106,69 @@ define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind { ret void } +define void @not_ctlz_v16i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vxori.b $vr0, $vr0, 255 +; CHECK-NEXT: vclz.b $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i8>, ptr %src + %neg = xor <16 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %neg, i1 false) + store <16 x i8> %res, ptr %dst + ret void +} + +define void @not_ctlz_v8i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vclz.h $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i16>, ptr %src + %neg = xor <8 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> + %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %neg, i1 false) + store <8 x i16> %res, ptr %dst + ret void +} + +define void @not_ctlz_v4i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vclz.w $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i32>, ptr %src + %neg = xor <4 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1> + %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %neg, i1 false) + store <4 x i32> %res, ptr %dst + ret void +} + +define void @not_ctlz_v2i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vclz.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <2 x i64>, ptr %src + %neg = xor <2 x i64> %v, <i64 -1, i64 -1> + %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %neg, i1 false) + store <2 x i64> %res, ptr %dst + ret void +} + declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll index 1ca6290..cb01ac0 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll @@ -7,22 +7,8 @@ define void @ceil_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrp.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrp.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -36,13 +22,7 @@ define void @ceil_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrp.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -57,22 +37,8 @@ define void @floor_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrm.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrm.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -86,13 +52,7 @@ define void @floor_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrm.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -107,22 +67,8 @@ define void @trunc_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrz.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrz.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -136,13 +82,7 @@ define void @trunc_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrz.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -157,22 +97,8 @@ define void @roundeven_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrne.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrne.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -186,13 +112,7 @@ define void @roundeven_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrne.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll new file mode 100644 index 0000000..9a806a1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll @@ -0,0 +1,758 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA32 %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA64 %s + +%struct.S = type { i64, i64, i8 } +%struct.F = type { float, double, float } +%struct.V = type { <4 x i32>, <4 x i32>, <16 x i16> } + +define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill +; LA32-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a1, $a0, 4 +; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 +; LA32-NEXT: add.w $a0, $a4, $a0 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: addi.w $s2, $a0, 8 +; LA32-NEXT: bnez $a1, .LBB0_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: move $s5, $zero +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s6, $zero +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB0_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: ld.w $a0, $s2, 4 +; LA32-NEXT: ld.w $a1, $s2, 0 +; LA32-NEXT: add.w $a0, $a0, $s6 +; LA32-NEXT: add.w $s3, $a1, $s3 +; LA32-NEXT: sltu $a1, $s3, $a1 +; LA32-NEXT: addi.w $s4, $s4, 1 +; LA32-NEXT: sltui $a2, $s4, 1 +; LA32-NEXT: add.w $s5, $s5, $a2 +; LA32-NEXT: xor $a2, $s4, $s1 +; LA32-NEXT: xor $a3, $s5, $s0 +; LA32-NEXT: or $a2, $a2, $a3 +; LA32-NEXT: add.w $s6, $a0, $a1 +; LA32-NEXT: bnez $a2, .LBB0_2 +; LA32-NEXT: b .LBB0_4 +; LA32-NEXT: .LBB0_3: +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s6, $zero +; LA32-NEXT: .LBB0_4: # %for.cond.cleanup +; LA32-NEXT: st.w $s3, $s2, 0 +; LA32-NEXT: st.w $s6, $s2, 4 +; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: slli.d $a1, $a0, 4 +; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: addi.d $s1, $a0, 8 +; LA64-NEXT: blez $s0, .LBB0_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: move $s2, $zero +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB0_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $a0, $s1, 0 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: add.d $s2, $a0, $s2 +; LA64-NEXT: bnez $s0, .LBB0_2 +; LA64-NEXT: b .LBB0_4 +; LA64-NEXT: .LBB0_3: +; LA64-NEXT: move $s2, $zero +; LA64-NEXT: .LBB0_4: # %for.cond.cleanup +; LA64-NEXT: st.d $s2, $s1, 0 +; LA64-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.S, ptr %a, i64 %k, i32 1 + %cmp4 = icmp sgt i64 %n, 0 + br i1 %cmp4, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %s.05 = phi i64 [ 0, %entry ], [ %add, %for.body ] + call void @f(ptr %a) + %0 = load i64, ptr %y + %add = add nsw i64 %0, %s.05 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %s.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ] + store i64 %s.0.lcssa, ptr %y + ret void +} + +define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a1, $a0, 4 +; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 +; LA32-NEXT: add.w $a0, $a4, $a0 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: bnez $a1, .LBB1_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: movgr2fr.w $fs0, $zero +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB1_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: fld.s $fa0, $s2, 0 +; LA32-NEXT: addi.w $s3, $s3, 1 +; LA32-NEXT: sltui $a0, $s3, 1 +; LA32-NEXT: add.w $s4, $s4, $a0 +; LA32-NEXT: xor $a0, $s3, $s1 +; LA32-NEXT: xor $a1, $s4, $s0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: fadd.s $fs0, $fa0, $fs0 +; LA32-NEXT: bnez $a0, .LBB1_2 +; LA32-NEXT: b .LBB1_4 +; LA32-NEXT: .LBB1_3: +; LA32-NEXT: movgr2fr.w $fs0, $zero +; LA32-NEXT: .LBB1_4: # %for.cond.cleanup +; LA32-NEXT: fst.s $fs0, $s2, 0 +; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: slli.d $a1, $a0, 4 +; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: blez $s0, .LBB1_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: movgr2fr.w $fs0, $zero +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB1_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: fld.s $fa0, $s1, 0 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: fadd.s $fs0, $fa0, $fs0 +; LA64-NEXT: bnez $s0, .LBB1_2 +; LA64-NEXT: b .LBB1_4 +; LA64-NEXT: .LBB1_3: +; LA64-NEXT: movgr2fr.w $fs0, $zero +; LA64-NEXT: .LBB1_4: # %for.cond.cleanup +; LA64-NEXT: fst.s $fs0, $s1, 0 +; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.F, ptr %a, i64 %k, i32 2 + %cmp4 = icmp sgt i64 %n, 0 + br i1 %cmp4, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %s.05 = phi float [ 0.0, %entry ], [ %add, %for.body ] + call void @f(ptr %a) + %0 = load float, ptr %y + %add = fadd float %0, %s.05 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %s.0.lcssa = phi float [ 0.0, %entry ], [ %add, %for.body ] + store float %s.0.lcssa, ptr %y + ret void +} + +define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_v4i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a0, $a0, 6 +; LA32-NEXT: add.w $a0, $a4, $a0 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: bnez $a1, .LBB2_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB2_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: vld $vr0, $s2, 0 +; LA32-NEXT: addi.w $s3, $s3, 1 +; LA32-NEXT: sltui $a0, $s3, 1 +; LA32-NEXT: add.w $s4, $s4, $a0 +; LA32-NEXT: xor $a0, $s3, $s1 +; LA32-NEXT: xor $a1, $s4, $s0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vadd.w $vr1, $vr0, $vr1 +; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: bnez $a0, .LBB2_2 +; LA32-NEXT: b .LBB2_4 +; LA32-NEXT: .LBB2_3: +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: .LBB2_4: # %for.cond.cleanup +; LA32-NEXT: vst $vr0, $s2, 0 +; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_v4i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: slli.d $a0, $a0, 6 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: blez $a1, .LBB2_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: vrepli.b $vr0, 0 +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB2_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: vld $vr0, $s1, 0 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vadd.w $vr1, $vr0, $vr1 +; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: bnez $s0, .LBB2_2 +; LA64-NEXT: b .LBB2_4 +; LA64-NEXT: .LBB2_3: +; LA64-NEXT: vrepli.b $vr0, 0 +; LA64-NEXT: .LBB2_4: # %for.cond.cleanup +; LA64-NEXT: vst $vr0, $s1, 0 +; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.V, ptr %a, i64 %k, i32 1 + %cmp = icmp sgt i64 %n, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %sum.0 = phi <4 x i32> [ zeroinitializer, %entry ], [ %addv, %for.body ] + call void @f(ptr %a) + %v = load <4 x i32>, ptr %y + %addv = add <4 x i32> %v, %sum.0 + %inc = add nuw nsw i64 %i.0, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %sum.lcssa = phi <4 x i32> [ zeroinitializer, %entry ], [ %addv, %for.body ] + store <4 x i32> %sum.lcssa, ptr %y + ret void +} + +define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_v16i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -80 +; LA32-NEXT: st.w $ra, $sp, 76 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 72 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 68 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 64 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 52 # 4-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a0, $a0, 6 +; LA32-NEXT: add.w $a0, $a4, $a0 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: addi.w $s2, $a0, 32 +; LA32-NEXT: bnez $a1, .LBB3_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: xvrepli.b $xr0, 0 +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB3_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: xvld $xr0, $s2, 0 +; LA32-NEXT: addi.w $s3, $s3, 1 +; LA32-NEXT: sltui $a0, $s3, 1 +; LA32-NEXT: add.w $s4, $s4, $a0 +; LA32-NEXT: xor $a0, $s3, $s1 +; LA32-NEXT: xor $a1, $s4, $s0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: xvadd.h $xr1, $xr0, $xr1 +; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: bnez $a0, .LBB3_2 +; LA32-NEXT: b .LBB3_4 +; LA32-NEXT: .LBB3_3: +; LA32-NEXT: xvrepli.b $xr0, 0 +; LA32-NEXT: .LBB3_4: # %for.cond.cleanup +; LA32-NEXT: xvst $xr0, $s2, 0 +; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 64 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 68 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 72 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 76 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 80 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_v16i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -80 +; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: slli.d $a0, $a0, 6 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: addi.d $s1, $a0, 32 +; LA64-NEXT: blez $a1, .LBB3_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB3_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: xvld $xr0, $s1, 0 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: xvadd.h $xr1, $xr0, $xr1 +; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: bnez $s0, .LBB3_2 +; LA64-NEXT: b .LBB3_4 +; LA64-NEXT: .LBB3_3: +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: .LBB3_4: # %for.cond.cleanup +; LA64-NEXT: xvst $xr0, $s1, 0 +; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 80 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.V, ptr %a, i64 %k, i32 2 + %cmp = icmp sgt i64 %n, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %sum.0 = phi <16 x i16> [ zeroinitializer, %entry ], [ %addv, %for.body ] + call void @f(ptr %a) + %v = load <16 x i16>, ptr %y + %addv = add <16 x i16> %v, %sum.0 + %inc = add nuw nsw i64 %i.0, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %sum.lcssa = phi <16 x i16> [ zeroinitializer, %entry ], [ %addv, %for.body ] + store <16 x i16> %sum.lcssa, ptr %y + ret void +} + +define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_extracti8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a1, $a0, 4 +; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 +; LA32-NEXT: add.w $a0, $a4, $a0 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: bnez $a1, .LBB4_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB4_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: vldrepl.b $vr0, $s2, 0 +; LA32-NEXT: addi.w $s3, $s3, 1 +; LA32-NEXT: sltui $a0, $s3, 1 +; LA32-NEXT: add.w $s4, $s4, $a0 +; LA32-NEXT: xor $a0, $s3, $s1 +; LA32-NEXT: xor $a1, $s4, $s0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vadd.b $vr1, $vr0, $vr1 +; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: bnez $a0, .LBB4_2 +; LA32-NEXT: b .LBB4_4 +; LA32-NEXT: .LBB4_3: +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: .LBB4_4: # %for.cond.cleanup +; LA32-NEXT: vstelm.b $vr0, $s2, 0, 1 +; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_extracti8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: slli.d $a1, $a0, 4 +; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: blez $s0, .LBB4_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: vrepli.b $vr0, 0 +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB4_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: vldrepl.b $vr0, $s1, 0 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vadd.b $vr1, $vr0, $vr1 +; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: bnez $s0, .LBB4_2 +; LA64-NEXT: b .LBB4_4 +; LA64-NEXT: .LBB4_3: +; LA64-NEXT: vrepli.b $vr0, 0 +; LA64-NEXT: .LBB4_4: # %for.cond.cleanup +; LA64-NEXT: vstelm.b $vr0, $s1, 0, 1 +; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.S, ptr %a, i64 %k, i32 2 + %cmp = icmp sgt i64 %n, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %sum.0 = phi <16 x i8> [ zeroinitializer, %entry ], [ %addv, %for.body ] + call void @f(ptr %a) + %e = load i8, ptr %y + %ins0 = insertelement <16 x i8> poison, i8 %e, i32 0 + %v = shufflevector <16 x i8> %ins0, <16 x i8> poison, <16 x i32> zeroinitializer + %addv = add <16 x i8> %v, %sum.0 + %inc = add nuw nsw i64 %i.0, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %sum.lcssa = phi <16 x i8> [ zeroinitializer, %entry ], [ %addv, %for.body ] + %res = extractelement <16 x i8> %sum.lcssa, i32 1 + store i8 %res, ptr %y + ret void +} + +define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_extractf64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -80 +; LA32-NEXT: st.w $ra, $sp, 76 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 72 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 68 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 64 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 52 # 4-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a1, $a0, 4 +; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 +; LA32-NEXT: add.w $a0, $a4, $a0 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: addi.w $s2, $a0, 8 +; LA32-NEXT: bnez $a1, .LBB5_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: xvrepli.b $xr0, 0 +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB5_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: xvldrepl.d $xr0, $s2, 0 +; LA32-NEXT: addi.w $s3, $s3, 1 +; LA32-NEXT: sltui $a0, $s3, 1 +; LA32-NEXT: add.w $s4, $s4, $a0 +; LA32-NEXT: xor $a0, $s3, $s1 +; LA32-NEXT: xor $a1, $s4, $s0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: xvfadd.d $xr1, $xr0, $xr1 +; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: bnez $a0, .LBB5_2 +; LA32-NEXT: b .LBB5_4 +; LA32-NEXT: .LBB5_3: +; LA32-NEXT: xvrepli.b $xr0, 0 +; LA32-NEXT: .LBB5_4: # %for.cond.cleanup +; LA32-NEXT: xvstelm.d $xr0, $s2, 0, 1 +; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 64 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 68 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 72 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 76 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 80 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_extractf64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -80 +; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: slli.d $a1, $a0, 4 +; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: addi.d $s1, $a0, 8 +; LA64-NEXT: blez $s0, .LBB5_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB5_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: xvldrepl.d $xr0, $s1, 0 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: xvfadd.d $xr1, $xr0, $xr1 +; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: bnez $s0, .LBB5_2 +; LA64-NEXT: b .LBB5_4 +; LA64-NEXT: .LBB5_3: +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: .LBB5_4: # %for.cond.cleanup +; LA64-NEXT: xvstelm.d $xr0, $s1, 0, 1 +; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 80 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.F, ptr %a, i64 %k, i32 1 + %cmp = icmp sgt i64 %n, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %sum.0 = phi <4 x double> [ zeroinitializer, %entry ], [ %addv, %for.body ] + call void @f(ptr %a) + %e = load double, ptr %y + %ins0 = insertelement <4 x double> poison, double %e, i32 0 + %v = shufflevector <4 x double> %ins0, <4 x double> poison, <4 x i32> zeroinitializer + %addv = fadd <4 x double> %v, %sum.0 + %inc = add nuw nsw i64 %i.0, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %sum.lcssa = phi <4 x double> [ zeroinitializer, %entry ], [ %addv, %for.body ] + %res = extractelement <4 x double> %sum.lcssa, i32 1 + store double %res, ptr %y + ret void +} + +declare void @f(ptr) diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt index d3c0da9..000c67ef 100644 --- a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt +++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt @@ -1439,11 +1439,8 @@ Key: PSUBWrm: [ 0.00 0.00 ] Key: PSUBWrr: [ 0.00 0.00 ] Key: PSWAPDrm: [ 0.00 0.00 ] Key: PSWAPDrr: [ 0.00 0.00 ] -Key: PT: [ 0.00 0.00 ] Key: PTCMMIMFP: [ 0.00 0.00 ] Key: PTCMMRLFP: [ 0.00 0.00 ] -Key: PTCONJTCMMIMFP: [ 0.00 0.00 ] -Key: PTCONJTFP: [ 0.00 0.00 ] Key: PTCVTROWD: [ 0.00 0.00 ] Key: PTCVTROWPS: [ 0.00 0.00 ] Key: PTDPBF: [ 0.00 0.00 ] @@ -1471,20 +1468,11 @@ Key: PTILEMOVROWrre: [ 0.00 0.00 ] Key: PTILEMOVROWrreV: [ 0.00 0.00 ] Key: PTILEMOVROWrri: [ 0.00 0.00 ] Key: PTILEMOVROWrriV: [ 0.00 0.00 ] -Key: PTILEPAIRLOAD: [ 0.00 0.00 ] -Key: PTILEPAIRSTORE: [ 0.00 0.00 ] Key: PTILESTORED: [ 0.00 0.00 ] Key: PTILESTOREDV: [ 0.00 0.00 ] Key: PTILEZERO: [ 0.00 0.00 ] Key: PTILEZEROV: [ 0.00 0.00 ] Key: PTMMULTF: [ 0.00 0.00 ] -Key: PTTCMMIMFP: [ 0.00 0.00 ] -Key: PTTCMMRLFP: [ 0.00 0.00 ] -Key: PTTDPBF: [ 0.00 0.00 ] -Key: PTTDPFP: [ 0.00 0.00 ] -Key: PTTMMULTF: [ 0.00 0.00 ] -Key: PTTRANSPOSED: [ 0.00 0.00 ] -Key: PTTRANSPOSEDV: [ 0.00 0.00 ] Key: PTWRITE: [ 0.00 0.00 ] Key: PTWRITEm: [ 0.00 0.00 ] Key: PTWRITEr: [ 0.00 0.00 ] @@ -1717,8 +1705,6 @@ Key: TAILJMPm: [ 0.00 0.00 ] Key: TAILJMPr: [ 0.00 0.00 ] Key: TCMMIMFP: [ 0.00 0.00 ] Key: TCMMRLFP: [ 0.00 0.00 ] -Key: TCONJTCMMIMFP: [ 0.00 0.00 ] -Key: TCONJTFP: [ 0.00 0.00 ] Key: TCRETURN_HIPE: [ 0.00 0.00 ] Key: TCRETURN_WIN: [ 0.00 0.00 ] Key: TCRETURN_WINmi: [ 0.00 0.00 ] @@ -1764,12 +1750,6 @@ Key: TPAUSE: [ 0.00 0.00 ] Key: TRAP: [ 0.00 0.00 ] Key: TST_F: [ 0.00 0.00 ] Key: TST_Fp: [ 0.00 0.00 ] -Key: TTCMMIMFP: [ 0.00 0.00 ] -Key: TTCMMRLFP: [ 0.00 0.00 ] -Key: TTDPBF: [ 0.00 0.00 ] -Key: TTDPFP: [ 0.00 0.00 ] -Key: TTMMULTF: [ 0.00 0.00 ] -Key: TTRANSPOSED: [ 0.00 0.00 ] Key: TZCNT: [ 0.00 0.00 ] Key: TZMSK: [ 0.00 0.00 ] Key: UBSAN_UD: [ 0.00 0.00 ] @@ -7034,7 +7014,6 @@ Key: PhyReg_VR256: [ 0.00 0.00 ] Key: PhyReg_VR512: [ 0.00 0.00 ] Key: PhyReg_VR512_0_15: [ 0.00 0.00 ] Key: PhyReg_TILE: [ 0.00 0.00 ] -Key: PhyReg_TILEPAIR: [ 0.00 0.00 ] Key: VirtReg_GR8: [ 0.00 0.00 ] Key: VirtReg_GRH8: [ 0.00 0.00 ] Key: VirtReg_GR8_NOREX2: [ 0.00 0.00 ] @@ -7170,4 +7149,3 @@ Key: VirtReg_VR256: [ 0.00 0.00 ] Key: VirtReg_VR512: [ 0.00 0.00 ] Key: VirtReg_VR512_0_15: [ 0.00 0.00 ] Key: VirtReg_TILE: [ 0.00 0.00 ] -Key: VirtReg_TILEPAIR: [ 0.00 0.00 ] diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt index c6e5508..bb72886 100644 --- a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt +++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt @@ -1439,11 +1439,8 @@ Key: PSUBWrm: [ 0.00 0.00 ] Key: PSUBWrr: [ 0.00 0.00 ] Key: PSWAPDrm: [ 0.00 0.00 ] Key: PSWAPDrr: [ 0.00 0.00 ] -Key: PT: [ 0.00 0.00 ] Key: PTCMMIMFP: [ 0.00 0.00 ] Key: PTCMMRLFP: [ 0.00 0.00 ] -Key: PTCONJTCMMIMFP: [ 0.00 0.00 ] -Key: PTCONJTFP: [ 0.00 0.00 ] Key: PTCVTROWD: [ 0.00 0.00 ] Key: PTCVTROWPS: [ 0.00 0.00 ] Key: PTDPBF: [ 0.00 0.00 ] @@ -1471,20 +1468,11 @@ Key: PTILEMOVROWrre: [ 0.00 0.00 ] Key: PTILEMOVROWrreV: [ 0.00 0.00 ] Key: PTILEMOVROWrri: [ 0.00 0.00 ] Key: PTILEMOVROWrriV: [ 0.00 0.00 ] -Key: PTILEPAIRLOAD: [ 0.00 0.00 ] -Key: PTILEPAIRSTORE: [ 0.00 0.00 ] Key: PTILESTORED: [ 0.00 0.00 ] Key: PTILESTOREDV: [ 0.00 0.00 ] Key: PTILEZERO: [ 0.00 0.00 ] Key: PTILEZEROV: [ 0.00 0.00 ] Key: PTMMULTF: [ 0.00 0.00 ] -Key: PTTCMMIMFP: [ 0.00 0.00 ] -Key: PTTCMMRLFP: [ 0.00 0.00 ] -Key: PTTDPBF: [ 0.00 0.00 ] -Key: PTTDPFP: [ 0.00 0.00 ] -Key: PTTMMULTF: [ 0.00 0.00 ] -Key: PTTRANSPOSED: [ 0.00 0.00 ] -Key: PTTRANSPOSEDV: [ 0.00 0.00 ] Key: PTWRITE: [ 0.00 0.00 ] Key: PTWRITEm: [ 0.00 0.00 ] Key: PTWRITEr: [ 0.00 0.00 ] @@ -1717,8 +1705,6 @@ Key: TAILJMPm: [ 0.00 0.00 ] Key: TAILJMPr: [ 0.00 0.00 ] Key: TCMMIMFP: [ 0.00 0.00 ] Key: TCMMRLFP: [ 0.00 0.00 ] -Key: TCONJTCMMIMFP: [ 0.00 0.00 ] -Key: TCONJTFP: [ 0.00 0.00 ] Key: TCRETURN_HIPE: [ 0.00 0.00 ] Key: TCRETURN_WIN: [ 0.00 0.00 ] Key: TCRETURN_WINmi: [ 0.00 0.00 ] @@ -1764,12 +1750,6 @@ Key: TPAUSE: [ 0.00 0.00 ] Key: TRAP: [ 0.00 0.00 ] Key: TST_F: [ 0.00 0.00 ] Key: TST_Fp: [ 0.00 0.00 ] -Key: TTCMMIMFP: [ 0.00 0.00 ] -Key: TTCMMRLFP: [ 0.00 0.00 ] -Key: TTDPBF: [ 0.00 0.00 ] -Key: TTDPFP: [ 0.00 0.00 ] -Key: TTMMULTF: [ 0.00 0.00 ] -Key: TTRANSPOSED: [ 0.00 0.00 ] Key: TZCNT: [ 0.00 0.00 ] Key: TZMSK: [ 0.00 0.00 ] Key: UBSAN_UD: [ 0.00 0.00 ] @@ -7034,7 +7014,6 @@ Key: PhyReg_VR256: [ 0.00 0.00 ] Key: PhyReg_VR512: [ 0.00 0.00 ] Key: PhyReg_VR512_0_15: [ 0.00 0.00 ] Key: PhyReg_TILE: [ 0.00 0.00 ] -Key: PhyReg_TILEPAIR: [ 0.00 0.00 ] Key: VirtReg_GR8: [ 0.00 0.00 ] Key: VirtReg_GRH8: [ 0.00 0.00 ] Key: VirtReg_GR8_NOREX2: [ 0.00 0.00 ] @@ -7170,4 +7149,3 @@ Key: VirtReg_VR256: [ 0.00 0.00 ] Key: VirtReg_VR512: [ 0.00 0.00 ] Key: VirtReg_VR512_0_15: [ 0.00 0.00 ] Key: VirtReg_TILE: [ 0.00 0.00 ] -Key: VirtReg_TILEPAIR: [ 0.00 0.00 ] diff --git a/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll index bd8d882..9dd402d 100644 --- a/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll +++ b/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll @@ -26,7 +26,7 @@ ; Also, the first eviction problem is significantly less than 300 instructions. Check ; that there is a zero value. ; Note: we're regex-ing some of the opcodes to avoid test flakyness. -; CHECK: instructions: 20,{{([0-9]{4})}},1{{([0-9]{3})}},2{{([0-9]{3})}},{{.*}},0, +; CHECK: instructions: 20,{{([0-9]{4})}},{{([0-9]{4})}},{{([0-9]{4})}},{{.*}},0, ; Only the candidate virtreg and the 10th LR are included in this problem. Make ; sure the other LRs have values of zero. There are 2700 0s followed by some 1s. ; There's a limit to how many repetitions can be matched. diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-1cta.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-1cta.ll index b5c43fd2..d653895 100644 --- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-1cta.ll +++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-1cta.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-2cta.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-2cta.ll index 57342dc..5de1ac8 100644 --- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-2cta.ll +++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-2cta.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-gather4.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-gather4.ll index 6296d5a..2f5c1ef 100644 --- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-gather4.ll +++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-gather4.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw.ll index e5ae387..a2b2c2f 100644 --- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw.ll +++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw128.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw128.ll index 7d04ada..e4c48dd 100644 --- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw128.ll +++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw128.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s.ll index b0fe77c..727bb3b 100644 --- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s.ll +++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s ; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %} ; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} target triple = "nvptx64-nvidia-cuda" @@ -29,10 +33,10 @@ define void @cp_async_bulk_tensor_g2s_tile_1d(ptr addrspace(7) %d, ptr addrspace ; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_1d_param_1]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_tensor_g2s_tile_1d_param_2]; ; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_g2s_tile_1d_param_3]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1}], [%rd2]; +; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_1d_param_4]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_tile_1d_param_5]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1}], [%rd2]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1}], [%rd2], %rd4; -; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_1d_param_4]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1}], [%rd2], %rs1; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1}], [%rd2], %rs1, %rd4; ; CHECK-PTX64-NEXT: ret; @@ -48,10 +52,10 @@ define void @cp_async_bulk_tensor_g2s_tile_1d(ptr addrspace(7) %d, ptr addrspace ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_g2s_tile_1d_param_1]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [cp_async_bulk_tensor_g2s_tile_1d_param_2]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_1d_param_3]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3}], [%r2]; +; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_1d_param_4]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_1d_param_5]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3}], [%r2]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3}], [%r2], %rd2; -; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_1d_param_4]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3}], [%r2], %rs1; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3}], [%r2], %rs1, %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; @@ -79,10 +83,10 @@ define void @cp_async_bulk_tensor_g2s_tile_2d(ptr addrspace(7) %d, ptr addrspace ; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_tensor_g2s_tile_2d_param_2]; ; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_g2s_tile_2d_param_3]; ; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_g2s_tile_2d_param_4]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2}], [%rd2]; +; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_2d_param_5]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_tile_2d_param_6]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2}], [%rd2]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2}], [%rd2], %rd4; -; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_2d_param_5]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2}], [%rd2], %rs1; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2}], [%rd2], %rs1, %rd4; ; CHECK-PTX64-NEXT: ret; @@ -99,10 +103,10 @@ define void @cp_async_bulk_tensor_g2s_tile_2d(ptr addrspace(7) %d, ptr addrspace ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [cp_async_bulk_tensor_g2s_tile_2d_param_2]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_2d_param_3]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_tile_2d_param_4]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4}], [%r2]; +; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_2d_param_5]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_2d_param_6]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4}], [%r2]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4}], [%r2], %rd2; -; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_2d_param_5]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4}], [%r2], %rs1; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4}], [%r2], %rs1, %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; @@ -131,10 +135,10 @@ define void @cp_async_bulk_tensor_g2s_tile_3d(ptr addrspace(7) %d, ptr addrspace ; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_g2s_tile_3d_param_3]; ; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_g2s_tile_3d_param_4]; ; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_3d_param_5]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2]; +; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_3d_param_6]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_tile_3d_param_7]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], %rd4; -; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_3d_param_6]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], %rs1; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], %rs1, %rd4; ; CHECK-PTX64-NEXT: ret; @@ -152,10 +156,10 @@ define void @cp_async_bulk_tensor_g2s_tile_3d(ptr addrspace(7) %d, ptr addrspace ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_3d_param_3]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_tile_3d_param_4]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_g2s_tile_3d_param_5]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5}], [%r2]; +; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_3d_param_6]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_3d_param_7]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5}], [%r2]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], %rd2; -; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_3d_param_6]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], %rs1; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], %rs1, %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; @@ -185,10 +189,10 @@ define void @cp_async_bulk_tensor_g2s_tile_4d(ptr addrspace(7) %d, ptr addrspace ; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_g2s_tile_4d_param_4]; ; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_4d_param_5]; ; CHECK-PTX64-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_tile_4d_param_6]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2]; +; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_4d_param_7]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_tile_4d_param_8]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], %rd4; -; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_4d_param_7]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], %rs1; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], %rs1, %rd4; ; CHECK-PTX64-NEXT: ret; @@ -207,10 +211,10 @@ define void @cp_async_bulk_tensor_g2s_tile_4d(ptr addrspace(7) %d, ptr addrspace ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_tile_4d_param_4]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_g2s_tile_4d_param_5]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r6, [cp_async_bulk_tensor_g2s_tile_4d_param_6]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2]; +; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_4d_param_7]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_4d_param_8]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], %rd2; -; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_4d_param_7]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], %rs1; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], %rs1, %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; @@ -241,10 +245,10 @@ define void @cp_async_bulk_tensor_g2s_tile_5d(ptr addrspace(7) %d, ptr addrspace ; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_5d_param_5]; ; CHECK-PTX64-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_tile_5d_param_6]; ; CHECK-PTX64-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_g2s_tile_5d_param_7]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2]; +; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_5d_param_8]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_tile_5d_param_9]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], %rd4; -; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_5d_param_8]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], %rs1; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], %rs1, %rd4; ; CHECK-PTX64-NEXT: ret; @@ -264,10 +268,10 @@ define void @cp_async_bulk_tensor_g2s_tile_5d(ptr addrspace(7) %d, ptr addrspace ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_g2s_tile_5d_param_5]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r6, [cp_async_bulk_tensor_g2s_tile_5d_param_6]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r7, [cp_async_bulk_tensor_g2s_tile_5d_param_7]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2]; +; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_5d_param_8]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_5d_param_9]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], %rd2; -; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_5d_param_8]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], %rs1; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], %rs1, %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; @@ -297,10 +301,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_3d(ptr addrspace(7) %d, ptr addrspa ; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_g2s_im2col_3d_param_4]; ; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_im2col_3d_param_5]; ; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_3d_param_6]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], {%rs1}; +; CHECK-PTX64-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_3d_param_7]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_im2col_3d_param_8]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], {%rs1}; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], {%rs1}, %rd4; -; CHECK-PTX64-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_3d_param_7]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], {%rs1}, %rs2; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], {%rs1}, %rs2, %rd4; ; CHECK-PTX64-NEXT: ret; @@ -319,10 +323,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_3d(ptr addrspace(7) %d, ptr addrspa ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_im2col_3d_param_4]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_g2s_im2col_3d_param_5]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_3d_param_6]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], {%rs1}; +; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_3d_param_7]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_im2col_3d_param_8]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], {%rs1}; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], {%rs1}, %rd2; -; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_3d_param_7]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], {%rs1}, %rs2; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], {%rs1}, %rs2, %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; @@ -354,10 +358,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_4d(ptr addrspace(7) %d, ptr addrspa ; CHECK-PTX64-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_im2col_4d_param_6]; ; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_4d_param_7]; ; CHECK-PTX64-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_4d_param_8]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], {%rs1, %rs2}; +; CHECK-PTX64-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_4d_param_9]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_im2col_4d_param_10]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], {%rs1, %rs2}; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], {%rs1, %rs2}, %rd4; -; CHECK-PTX64-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_4d_param_9]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], {%rs1, %rs2}, %rs3; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], {%rs1, %rs2}, %rs3, %rd4; ; CHECK-PTX64-NEXT: ret; @@ -378,10 +382,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_4d(ptr addrspace(7) %d, ptr addrspa ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r6, [cp_async_bulk_tensor_g2s_im2col_4d_param_6]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_4d_param_7]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_4d_param_8]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], {%rs1, %rs2}; +; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_4d_param_9]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_im2col_4d_param_10]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], {%rs1, %rs2}; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], {%rs1, %rs2}, %rd2; -; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_4d_param_9]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], {%rs1, %rs2}, %rs3; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], {%rs1, %rs2}, %rs3, %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; @@ -415,10 +419,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_5d(ptr addrspace(7) %d, ptr addrspa ; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_5d_param_8]; ; CHECK-PTX64-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_5d_param_9]; ; CHECK-PTX64-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_5d_param_10]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], {%rs1, %rs2, %rs3}; +; CHECK-PTX64-NEXT: ld.param.b16 %rs4, [cp_async_bulk_tensor_g2s_im2col_5d_param_11]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_im2col_5d_param_12]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], {%rs1, %rs2, %rs3}; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], {%rs1, %rs2, %rs3}, %rd4; -; CHECK-PTX64-NEXT: ld.param.b16 %rs4, [cp_async_bulk_tensor_g2s_im2col_5d_param_11]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], {%rs1, %rs2, %rs3}, %rs4; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], {%rs1, %rs2, %rs3}, %rs4, %rd4; ; CHECK-PTX64-NEXT: ret; @@ -441,10 +445,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_5d(ptr addrspace(7) %d, ptr addrspa ; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_5d_param_8]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_5d_param_9]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_5d_param_10]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], {%rs1, %rs2, %rs3}; +; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs4, [cp_async_bulk_tensor_g2s_im2col_5d_param_11]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_im2col_5d_param_12]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], {%rs1, %rs2, %rs3}; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], {%rs1, %rs2, %rs3}, %rd2; -; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs4, [cp_async_bulk_tensor_g2s_im2col_5d_param_11]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], {%rs1, %rs2, %rs3}, %rs4; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], {%rs1, %rs2, %rs3}, %rs4, %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/f16-ex2.ll b/llvm/test/CodeGen/NVPTX/f16-ex2.ll index ee79f9d..af3fe67 100644 --- a/llvm/test/CodeGen/NVPTX/f16-ex2.ll +++ b/llvm/test/CodeGen/NVPTX/f16-ex2.ll @@ -1,12 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s -; RUN: %if ptxas-sm_75 && ptxas-isa-7.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %} +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK-FP16 %s +; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %} target triple = "nvptx64-nvidia-cuda" declare half @llvm.nvvm.ex2.approx.f16(half) -declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>) +declare <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half>) +declare bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat) +declare <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat>) -; CHECK-LABEL: ex2_half define half @ex2_half(half %0) { ; CHECK-FP16-LABEL: ex2_half( ; CHECK-FP16: { @@ -21,7 +22,6 @@ define half @ex2_half(half %0) { ret half %res } -; CHECK-LABEL: ex2_2xhalf define <2 x half> @ex2_2xhalf(<2 x half> %0) { ; CHECK-FP16-LABEL: ex2_2xhalf( ; CHECK-FP16: { @@ -32,6 +32,34 @@ define <2 x half> @ex2_2xhalf(<2 x half> %0) { ; CHECK-FP16-NEXT: ex2.approx.f16x2 %r2, %r1; ; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-FP16-NEXT: ret; - %res = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %0) + %res = call <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half> %0) ret <2 x half> %res } + +define bfloat @ex2_bfloat(bfloat %0) { +; CHECK-FP16-LABEL: ex2_bfloat( +; CHECK-FP16: { +; CHECK-FP16-NEXT: .reg .b16 %rs<3>; +; CHECK-FP16-EMPTY: +; CHECK-FP16-NEXT: // %bb.0: +; CHECK-FP16-NEXT: ld.param.b16 %rs1, [ex2_bfloat_param_0]; +; CHECK-FP16-NEXT: ex2.approx.ftz.bf16 %rs2, %rs1; +; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2; +; CHECK-FP16-NEXT: ret; + %res = call bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat %0) + ret bfloat %res +} + +define <2 x bfloat> @ex2_2xbfloat(<2 x bfloat> %0) { +; CHECK-FP16-LABEL: ex2_2xbfloat( +; CHECK-FP16: { +; CHECK-FP16-NEXT: .reg .b32 %r<3>; +; CHECK-FP16-EMPTY: +; CHECK-FP16-NEXT: // %bb.0: +; CHECK-FP16-NEXT: ld.param.b32 %r1, [ex2_2xbfloat_param_0]; +; CHECK-FP16-NEXT: ex2.approx.ftz.bf16x2 %r2, %r1; +; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-FP16-NEXT: ret; + %res = call <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat> %0) + ret <2 x bfloat> %res +} diff --git a/llvm/test/CodeGen/NVPTX/f32-ex2.ll b/llvm/test/CodeGen/NVPTX/f32-ex2.ll index 796d80d..97b9d35 100644 --- a/llvm/test/CodeGen/NVPTX/f32-ex2.ll +++ b/llvm/test/CodeGen/NVPTX/f32-ex2.ll @@ -3,7 +3,8 @@ ; RUN: %if ptxas-sm_50 && ptxas-isa-3.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_50 -mattr=+ptx32 | %ptxas-verify -arch=sm_50 %} target triple = "nvptx-nvidia-cuda" -declare float @llvm.nvvm.ex2.approx.f(float) +declare float @llvm.nvvm.ex2.approx.f32(float) +declare float @llvm.nvvm.ex2.approx.ftz.f32(float) ; CHECK-LABEL: ex2_float define float @ex2_float(float %0) { @@ -16,7 +17,7 @@ define float @ex2_float(float %0) { ; CHECK-NEXT: ex2.approx.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; - %res = call float @llvm.nvvm.ex2.approx.f(float %0) + %res = call float @llvm.nvvm.ex2.approx.f32(float %0) ret float %res } @@ -31,6 +32,6 @@ define float @ex2_float_ftz(float %0) { ; CHECK-NEXT: ex2.approx.ftz.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; - %res = call float @llvm.nvvm.ex2.approx.ftz.f(float %0) + %res = call float @llvm.nvvm.ex2.approx.ftz.f32(float %0) ret float %res } diff --git a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll index c3183a1..c50a0fb3 100644 --- a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll +++ b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll @@ -7,11 +7,11 @@ ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .half 0 ; Num Functions -; CHECK-NEXT: .word 12 +; CHECK-NEXT: .word 13 ; Num LargeConstants -; CHECK-NEXT: .word 2 +; CHECK-NEXT: .word 3 ; Num Callsites -; CHECK-NEXT: .word 16 +; CHECK-NEXT: .word 17 ; Functions and stack size ; CHECK-NEXT: .quad constantargs @@ -38,8 +38,8 @@ ; CHECK-NEXT: .quad liveConstant ; CHECK-NEXT: .quad 0 ; CHECK-NEXT: .quad 1 -; CHECK-NEXT: .quad spilledValue -; CHECK-NEXT: .quad 144 +; CHECK-NEXT: .quad liveArgs +; CHECK-NEXT: .quad 0 ; CHECK-NEXT: .quad 1 ; CHECK-NEXT: .quad directFrameIdx ; CHECK-NEXT: .quad 48 @@ -50,10 +50,14 @@ ; CHECK-NEXT: .quad needsStackRealignment ; CHECK-NEXT: .quad -1 ; CHECK-NEXT: .quad 1 +; CHECK-NEXT: .quad floats +; CHECK-NEXT: .quad 32 +; CHECK-NEXT: .quad 1 ; Num LargeConstants ; CHECK-NEXT: .quad 4294967295 ; CHECK-NEXT: .quad 4294967296 +; CHECK-NEXT: .quad 4609434218613702656 ; Constant arguments ; @@ -278,7 +282,7 @@ define void @liveConstant() { ; ; Verify 28 stack map entries. ; -; CHECK-LABEL: .word .L{{.*}}-spilledValue +; CHECK-LABEL: .word .L{{.*}}-liveArgs ; CHECK-NEXT: .half 0 ; CHECK-NEXT: .half 28 ; @@ -290,7 +294,7 @@ define void @liveConstant() { ; CHECK-NEXT: .half 2 ; CHECK-NEXT: .half 0 ; CHECK-NEXT: .word -define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 zeroext %l26, i32 signext %l27) { +define void @liveArgs(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 zeroext %l26, i32 signext %l27) { entry: call void (i64, i32, ptr, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 28, ptr null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 %l26, i32 %l27) ret void @@ -379,6 +383,104 @@ define void @needsStackRealignment() { } declare void @escape_values(...) +; CHECK-LABEL: .word .L{{.*}}-floats +; CHECK-NEXT: .half 0 +; Num Locations +; CHECK-NEXT: .half 12 +; Loc 0: constant float as constant integer +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 1: constant double as large constant integer +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 2: constant half as constant integer +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 3: constant bfloat as constant integer +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 4: float value in X register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 10 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 5: double value in X register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 11 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 6: half value in X register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 12 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 7: bfloat value in X register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 13 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 8: float on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 2 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 9: double on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 2 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 10: half on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 2 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +; Loc 11: bfloat on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .half 8 +; CHECK-NEXT: .half 2 +; CHECK-NEXT: .half 0 +; CHECK-NEXT: .word +define void @floats(float %f, double %g, half %h, bfloat %i) { + %ff = alloca float + %gg = alloca double + %hh = alloca half + %ii = alloca bfloat + call void (i64, i32, ...) @llvm.experimental.stackmap(i64 888, i32 0, float 1.25, + double 1.5, half 1.5, bfloat 1.5, float %f, double %g, half %h, bfloat %i, ptr %ff, ptr %gg, ptr %hh, ptr %ii) + ret void +} + declare void @llvm.experimental.stackmap(i64, i32, ...) declare void @llvm.experimental.patchpoint.void(i64, i32, ptr, i32, ...) declare i64 @llvm.experimental.patchpoint.i64(i64, i32, ptr, i32, ...) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 4c35b25..7e6f2c7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -15265,6 +15265,259 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) { ret <4 x i32> %x } +define <7 x i8> @mgather_baseidx_v7i8(ptr %base, <7 x i8> %idxs, <7 x i1> %m, <7 x i8> %passthru) { +; RV32-LABEL: mgather_baseidx_v7i8: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 127 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.s.x v10, a1 +; RV32-NEXT: vmand.mm v0, v0, v10 +; RV32-NEXT: vsext.vf4 v10, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_baseidx_v7i8: +; RV64V: # %bb.0: +; RV64V-NEXT: li a1, 127 +; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64V-NEXT: vmv.s.x v10, a1 +; RV64V-NEXT: vmand.mm v0, v0, v10 +; RV64V-NEXT: vsext.vf8 v12, v8 +; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t +; RV64V-NEXT: vmv1r.v v8, v9 +; RV64V-NEXT: ret +; +; RV64ZVE32F-LABEL: mgather_baseidx_v7i8: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: addi sp, sp, -16 +; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV64ZVE32F-NEXT: .cfi_remember_state +; RV64ZVE32F-NEXT: li a1, 64 +; RV64ZVE32F-NEXT: addi a2, sp, 8 +; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV64ZVE32F-NEXT: vsm.v v0, (a2) +; RV64ZVE32F-NEXT: ld a1, 8(sp) +; RV64ZVE32F-NEXT: andi a2, a1, 1 +; RV64ZVE32F-NEXT: beqz a2, .LBB132_2 +; RV64ZVE32F-NEXT: # %bb.1: # %cond.load +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lbu a2, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-NEXT: vmv.v.x v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 5 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-NEXT: .LBB132_2: # %else +; RV64ZVE32F-NEXT: andi a2, a1, 2 +; RV64ZVE32F-NEXT: beqz a2, .LBB132_4 +; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2 +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: lbu a3, 0(a3) +; RV64ZVE32F-NEXT: vmv.v.x v10, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v11 +; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3 +; RV64ZVE32F-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v11 +; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 5 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3 +; RV64ZVE32F-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-NEXT: .LBB132_4: # %else2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 +; RV64ZVE32F-NEXT: beqz a2, .LBB132_6 +; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1 +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 3 +; RV64ZVE32F-NEXT: vmv.x.s a4, v11 +; RV64ZVE32F-NEXT: vmv.v.x v11, a3 +; RV64ZVE32F-NEXT: vmv.x.s a3, v12 +; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 4 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lbu a2, 0(a2) +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4 +; RV64ZVE32F-NEXT: vmv.x.s a4, v12 +; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 5 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v12 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v11, a2 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-NEXT: .LBB132_6: # %else5 +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: beqz a2, .LBB132_8 +; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 +; RV64ZVE32F-NEXT: vmv.x.s a4, v11 +; RV64ZVE32F-NEXT: vmv.v.x v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4 +; RV64ZVE32F-NEXT: vmv.x.s a4, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 5 +; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: lbu a3, 0(a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-NEXT: .LBB132_8: # %else8 +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 +; RV64ZVE32F-NEXT: bnez a2, .LBB132_13 +; RV64ZVE32F-NEXT: # %bb.9: # %else11 +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: bnez a2, .LBB132_14 +; RV64ZVE32F-NEXT: .LBB132_10: # %else14 +; RV64ZVE32F-NEXT: andi a1, a1, 64 +; RV64ZVE32F-NEXT: beqz a1, .LBB132_12 +; RV64ZVE32F-NEXT: .LBB132_11: # %cond.load16 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 +; RV64ZVE32F-NEXT: add a0, a0, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: lbu a0, 0(a0) +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vmv.x.s a1, v9 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 +; RV64ZVE32F-NEXT: .LBB132_12: # %else17 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vmv1r.v v8, v9 +; RV64ZVE32F-NEXT: addi sp, sp, 16 +; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 0 +; RV64ZVE32F-NEXT: ret +; RV64ZVE32F-NEXT: .LBB132_13: # %cond.load10 +; RV64ZVE32F-NEXT: .cfi_restore_state +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2 +; RV64ZVE32F-NEXT: vmv.x.s a4, v10 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.v.x v10, a3 +; RV64ZVE32F-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4 +; RV64ZVE32F-NEXT: vmv.x.s a4, v11 +; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 5 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lbu a2, 0(a2) +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3 +; RV64ZVE32F-NEXT: vmv.x.s a3, v11 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: beqz a2, .LBB132_10 +; RV64ZVE32F-NEXT: .LBB132_14: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: vmv.x.s a3, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 +; RV64ZVE32F-NEXT: vmv.x.s a4, v11 +; RV64ZVE32F-NEXT: vmv.v.x v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3 +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4 +; RV64ZVE32F-NEXT: vmv.x.s a4, v10 +; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6 +; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: lbu a3, 0(a3) +; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2 +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-NEXT: andi a1, a1, 64 +; RV64ZVE32F-NEXT: bnez a1, .LBB132_11 +; RV64ZVE32F-NEXT: j .LBB132_12 + %ptrs = getelementptr inbounds i8, ptr %base, <7 x i8> %idxs + %v = call <7 x i8> @llvm.masked.gather.v7i8.v7p0(<7 x ptr> %ptrs, i32 1, <7 x i1> %m, <7 x i8> %passthru) + ret <7 x i8> %v +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV32V-ZVFH: {{.*}} ; RV32V-ZVFHMIN: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/pr165232.ll b/llvm/test/CodeGen/RISCV/rvv/pr165232.ll new file mode 100644 index 0000000..bef53c6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/pr165232.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +define i1 @main(ptr %var_117, ptr %arrayinit.element3045, ptr %arrayinit.element3047, ptr %arrayinit.element3049, ptr %arrayinit.element3051, ptr %arrayinit.element3053, ptr %arrayinit.element3055, ptr %arrayinit.element3057, ptr %arrayinit.element3059, ptr %arrayinit.element3061, ptr %arrayinit.element3063, ptr %arrayinit.element3065, ptr %arrayinit.element3067, i64 %var_94_i.07698, target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %1) { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: mv t1, t0 +; CHECK-NEXT: slli t0, t0, 1 +; CHECK-NEXT: add t0, t0, t1 +; CHECK-NEXT: sub sp, sp, t0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd a2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: vs4r.v v16, (a1) # vscale x 32-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t0, 56(a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t1, 48(a1) +; CHECK-NEXT: vsetvli t2, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t2, 40(a1) +; CHECK-NEXT: # kill: def $v10 killed $v9 killed $vtype +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t3, 32(a1) +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t4, 16(a1) +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t5, 24(a1) +; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vsetvli t6, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v22, 0 +; CHECK-NEXT: vmv1r.v v14, v9 +; CHECK-NEXT: sd zero, 0(a0) +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmv1r.v v15, v9 +; CHECK-NEXT: vmv1r.v v18, v9 +; CHECK-NEXT: li t6, 1023 +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vmv1r.v v19, v9 +; CHECK-NEXT: slli t6, t6, 52 +; CHECK-NEXT: vmv.v.i v28, 0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs2r.v v22, (a1) # vscale x 16-byte Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: vs4r.v v24, (a1) # vscale x 32-byte Folded Spill +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: ld a2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: vs2r.v v28, (a1) # vscale x 16-byte Folded Spill +; CHECK-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: vmv1r.v v20, v9 +; CHECK-NEXT: sd t6, 0(t5) +; CHECK-NEXT: vmv2r.v v16, v14 +; CHECK-NEXT: vmv2r.v v14, v12 +; CHECK-NEXT: vmv2r.v v12, v10 +; CHECK-NEXT: vmv1r.v v11, v9 +; CHECK-NEXT: vmv1r.v v21, v9 +; CHECK-NEXT: csrr t5, vlenb +; CHECK-NEXT: slli t5, t5, 3 +; CHECK-NEXT: add t5, sp, t5 +; CHECK-NEXT: addi t5, t5, 16 +; CHECK-NEXT: vs2r.v v18, (t5) # vscale x 16-byte Folded Spill +; CHECK-NEXT: csrr t6, vlenb +; CHECK-NEXT: slli t6, t6, 1 +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: vs2r.v v20, (t5) # vscale x 16-byte Folded Spill +; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v19, 0 +; CHECK-NEXT: vmclr.m v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v6, 0 +; CHECK-NEXT: .LBB0_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmv1r.v v20, v19 +; CHECK-NEXT: vmv1r.v v3, v19 +; CHECK-NEXT: vmv1r.v v5, v19 +; CHECK-NEXT: vmv1r.v v2, v19 +; CHECK-NEXT: vmv1r.v v31, v19 +; CHECK-NEXT: vmv1r.v v30, v19 +; CHECK-NEXT: vmv1r.v v4, v19 +; CHECK-NEXT: vmv2r.v v22, v10 +; CHECK-NEXT: vmv4r.v v24, v12 +; CHECK-NEXT: vmv2r.v v28, v16 +; CHECK-NEXT: vmv2r.v v8, v6 +; CHECK-NEXT: vmv1r.v v18, v19 +; CHECK-NEXT: vmv1r.v v21, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vle32.v v20, (t4) +; CHECK-NEXT: vle32.v v3, (t1) +; CHECK-NEXT: vle32.v v30, (a7) +; CHECK-NEXT: vle64.v v8, (a4) +; CHECK-NEXT: vle32.v v5, (t2) +; CHECK-NEXT: vle32.v v2, (t3) +; CHECK-NEXT: vle32.v v31, (a6) +; CHECK-NEXT: vmv1r.v v24, v30 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmflt.vv v21, v8, v6, v0.t +; CHECK-NEXT: vmv1r.v v8, v19 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vle32.v v18, (a2) +; CHECK-NEXT: vle32.v v8, (a3) +; CHECK-NEXT: vle32.v v4, (a5) +; CHECK-NEXT: vmv1r.v v22, v20 +; CHECK-NEXT: csrr t5, vlenb +; CHECK-NEXT: slli t5, t5, 3 +; CHECK-NEXT: add t5, sp, t5 +; CHECK-NEXT: addi t5, t5, 16 +; CHECK-NEXT: vl1r.v v1, (t5) # vscale x 8-byte Folded Reload +; CHECK-NEXT: csrr t6, vlenb +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: vl2r.v v2, (t5) # vscale x 16-byte Folded Reload +; CHECK-NEXT: slli t6, t6, 1 +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: vl1r.v v4, (t5) # vscale x 8-byte Folded Reload +; CHECK-NEXT: vsseg4e32.v v1, (zero) +; CHECK-NEXT: vsseg8e32.v v22, (a1) +; CHECK-NEXT: vmv1r.v v0, v21 +; CHECK-NEXT: vssub.vv v8, v19, v18, v0.t +; CHECK-NEXT: csrr t5, vlenb +; CHECK-NEXT: slli t5, t5, 2 +; CHECK-NEXT: mv t6, t5 +; CHECK-NEXT: slli t5, t5, 1 +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: add t5, sp, t5 +; CHECK-NEXT: addi t5, t5, 16 +; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload +; CHECK-NEXT: vsetvli zero, t0, e64, m2, ta, ma +; CHECK-NEXT: vsseg2e64.v v20, (zero) +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: addi t5, sp, 16 +; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload +; CHECK-NEXT: csrr t6, vlenb +; CHECK-NEXT: slli t6, t6, 2 +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: vl4r.v v24, (t5) # vscale x 32-byte Folded Reload +; CHECK-NEXT: vsetivli zero, 0, e64, m2, ta, ma +; CHECK-NEXT: vsseg4e64.v v20, (zero), v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg8e32.v v8, (a0) +; CHECK-NEXT: csrr t5, vlenb +; CHECK-NEXT: slli t5, t5, 4 +; CHECK-NEXT: add t5, sp, t5 +; CHECK-NEXT: addi t5, t5, 16 +; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload +; CHECK-NEXT: csrr t6, vlenb +; CHECK-NEXT: slli t6, t6, 2 +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: vl4r.v v24, (t5) # vscale x 32-byte Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsseg4e64.v v20, (zero) +; CHECK-NEXT: j .LBB0_1 +entry: + store double 0.000000e+00, ptr %var_117, align 8 + store double 1.000000e+00, ptr %arrayinit.element3061, align 8 + br label %for.body + +for.body: ; preds = %for.body, %entry + %2 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3059, i64 0) + %3 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3067, i64 0) + %4 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3065, i64 0) + %5 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3063, i64 0) + %6 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3055, i64 0) + %7 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3057, i64 0) + %8 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3053, i64 0) + %9 = call <vscale x 2 x double> @llvm.riscv.vle.nxv2f64.p0.i64(<vscale x 2 x double> zeroinitializer, ptr %arrayinit.element3051, i64 0) + %10 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.p0.i64(<vscale x 2 x i32> zeroinitializer, ptr %arrayinit.element3047, i64 0) + %11 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.p0.i64(<vscale x 2 x i32> zeroinitializer, ptr %arrayinit.element3049, i64 0) + call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv8i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) zeroinitializer, ptr null, i64 0, i64 5) + %12 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) zeroinitializer, <vscale x 2 x float> %8, i32 0) + %13 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %12, <vscale x 2 x float> %7, i32 2) + %14 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %13, <vscale x 2 x float> %6, i32 0) + %15 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %14, <vscale x 2 x float> %5, i32 0) + %16 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %15, <vscale x 2 x float> %4, i32 0) + %17 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %16, <vscale x 2 x float> %3, i32 0) + %18 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %17, <vscale x 2 x float> %2, i32 0) + call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %18, ptr %arrayinit.element3045, i64 0, i64 5) + %19 = tail call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.nxv2f64.i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %9, <vscale x 2 x i1> zeroinitializer, i64 0) + %20 = tail call <vscale x 2 x i32> @llvm.riscv.vssub.mask.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> %11, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %10, <vscale x 2 x i1> %19, i64 0, i64 0) + call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv16i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, ptr null, i64 %var_94_i.07698, i64 6) + call void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv2i1.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) zeroinitializer, ptr null, <vscale x 2 x i1> zeroinitializer, i64 0, i64 6) + %21 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2i32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) poison, <vscale x 2 x i32> %20, i32 0) + call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %21, ptr %var_117, i64 0, i64 5) + call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv16i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %1, ptr null, i64 0, i64 6) + br label %for.body +} diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir index dd9960d..9c2fa9d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -32,10 +32,10 @@ body: | ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: VS4R_V $v0m4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s256>) into %stack.0, align 8) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: $x13 = SLLI $x12, 2 - ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13 + ; CHECK-NEXT: $x12 = SLLI killed $x12, 2 + ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 ; CHECK-NEXT: VS2R_V $v4m2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s128>) into %stack.0, align 8) - ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 + ; CHECK-NEXT: $x12 = SRLI killed $x12, 1 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 ; CHECK-NEXT: VS1R_V $v6, killed $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0) ; CHECK-NEXT: $x11 = ADDI $x2, 16 @@ -93,10 +93,10 @@ body: | ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: $v10m2 = VL2RE8_V $x11 :: (load (<vscale x 1 x s128>) from %stack.0, align 8) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: $x13 = SLLI $x12, 1 - ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13 + ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 + ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 ; CHECK-NEXT: $v12m4 = VL4RE8_V $x11 :: (load (<vscale x 1 x s256>) from %stack.0, align 8) - ; CHECK-NEXT: $x12 = SLLI killed $x12, 2 + ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 ; CHECK-NEXT: $v16 = VL1RE8_V killed $x11 :: (load (<vscale x 1 x s64>) from %stack.0) ; CHECK-NEXT: VS1R_V killed $v10, killed renamable $x10 diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/issue-146942-ptr-cast.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/issue-146942-ptr-cast.ll index ed67344..4817e74 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/issue-146942-ptr-cast.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/issue-146942-ptr-cast.ll @@ -16,7 +16,6 @@ define void @case1() local_unnamed_addr { ; CHECK: %[[#BUFFER_LOAD:]] = OpLoad %[[#FLOAT4]] %{{[0-9]+}} Aligned 16 ; CHECK: %[[#CAST_LOAD:]] = OpBitcast %[[#INT4]] %[[#BUFFER_LOAD]] - ; CHECK: %[[#VEC_SHUFFLE:]] = OpVectorShuffle %[[#INT4]] %[[#CAST_LOAD]] %[[#CAST_LOAD]] 0 1 2 3 %1 = tail call target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4f32_12_0t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str) %2 = tail call target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4i32_12_1t(i32 0, i32 5, i32 1, i32 0, ptr nonnull @.str.2) %3 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4f32_12_0t(target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 0) %1, i32 0) @@ -29,8 +28,7 @@ define void @case1() local_unnamed_addr { define void @case2() local_unnamed_addr { ; CHECK: %[[#BUFFER_LOAD:]] = OpLoad %[[#FLOAT4]] %{{[0-9]+}} Aligned 16 ; CHECK: %[[#CAST_LOAD:]] = OpBitcast %[[#INT4]] %[[#BUFFER_LOAD]] - ; CHECK: %[[#VEC_SHUFFLE:]] = OpVectorShuffle %[[#INT4]] %[[#CAST_LOAD]] %[[#CAST_LOAD]] 0 1 2 3 - ; CHECK: %[[#VEC_TRUNCATE:]] = OpVectorShuffle %[[#INT3]] %[[#VEC_SHUFFLE]] %[[#UNDEF_INT4]] 0 1 2 + ; CHECK: %[[#VEC_TRUNCATE:]] = OpVectorShuffle %[[#INT3]] %[[#CAST_LOAD]] %[[#UNDEF_INT4]] 0 1 2 %1 = tail call target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4f32_12_0t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str) %2 = tail call target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v3i32_12_1t(i32 0, i32 5, i32 1, i32 0, ptr nonnull @.str.3) %3 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4f32_12_0t(target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 0) %1, i32 0) diff --git a/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll index 8491328..a1ec2cd 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll @@ -26,3 +26,25 @@ entry: store <4 x i32> %6, ptr addrspace(11) %7, align 16 ret void } + +; This tests a load from a pointer that has been bitcast between vector types +; which share the same total bit-width but have different numbers of elements. +; Tests that legalize-pointer-casts works correctly by moving the bitcast to +; the element that was loaded. + +define void @main2() local_unnamed_addr #0 { +entry: +; CHECK: %[[LOAD:[0-9]+]] = OpLoad %[[#v2_double]] {{.*}} +; CHECK: %[[BITCAST1:[0-9]+]] = OpBitcast %[[#v4_uint]] %[[LOAD]] +; CHECK: %[[BITCAST2:[0-9]+]] = OpBitcast %[[#v2_double]] %[[BITCAST1]] +; CHECK: OpStore {{%[0-9]+}} %[[BITCAST2]] {{.*}} + + %0 = tail call target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v2f64_12_1t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str.2) + %2 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2f64_12_1t(target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) %0, i32 0) + %3 = load <4 x i32>, ptr addrspace(11) %2 + %4 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2f64_12_1t(target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) %0, i32 1) + store <4 x i32> %3, ptr addrspace(11) %4 + ret void +} + +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } diff --git a/llvm/test/CodeGen/SystemZ/stackmap.ll b/llvm/test/CodeGen/SystemZ/stackmap.ll index 05b8de7..f414ea3 100644 --- a/llvm/test/CodeGen/SystemZ/stackmap.ll +++ b/llvm/test/CodeGen/SystemZ/stackmap.ll @@ -84,14 +84,14 @@ ; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .long 65535 +; CHECK-NEXT: .long -1 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .long 65535 +; CHECK-NEXT: .long -1 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 diff --git a/llvm/test/CodeGen/X86/amx-tf32-internal.ll b/llvm/test/CodeGen/X86/amx-tf32-internal.ll index 6d0f3c5..caf7a1c 100644 --- a/llvm/test/CodeGen/X86/amx-tf32-internal.ll +++ b/llvm/test/CodeGen/X86/amx-tf32-internal.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+avx512f, \ -; RUN: -mattr=+amx-tf32,+amx-transpose -verify-machineinstrs | FileCheck %s +; RUN: -mattr=+amx-tf32 -verify-machineinstrs | FileCheck %s define void @test_amx(i8* %pointer, i8* %base, i64 %stride) { ; CHECK-LABEL: test_amx: @@ -20,7 +20,6 @@ define void @test_amx(i8* %pointer, i8* %base, i64 %stride) { ; CHECK-NEXT: tilezero %tmm1 ; CHECK-NEXT: tilezero %tmm2 ; CHECK-NEXT: tmmultf32ps %tmm1, %tmm0, %tmm2 -; CHECK-NEXT: ttmmultf32ps %tmm1, %tmm0, %tmm2 ; CHECK-NEXT: tilestored %tmm2, (%rdi,%rdx) ; CHECK-NEXT: tilerelease ; CHECK-NEXT: vzeroupper @@ -31,9 +30,8 @@ define void @test_amx(i8* %pointer, i8* %base, i64 %stride) { %c = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8) %c1 = call x86_amx @llvm.x86.tmmultf32ps.internal(i16 8, i16 8, i16 8, x86_amx %c, x86_amx %a, x86_amx %b) - %c2 = call x86_amx @llvm.x86.ttmmultf32ps.internal(i16 8, i16 8, i16 8, x86_amx %c1, x86_amx %a, x86_amx %b) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %c2) + call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %c1) ret void } @@ -43,4 +41,3 @@ declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) declare x86_amx @llvm.x86.tmmultf32ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) -declare x86_amx @llvm.x86.ttmmultf32ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) diff --git a/llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll b/llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll index af1a7ae..642c1b7 100644 --- a/llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll +++ b/llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-tf32,+amx-transpose -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-tf32 -verify-machineinstrs | FileCheck %s define void @test_tmmultf32ps() { ; CHECK-LABEL: test_tmmultf32ps: @@ -11,13 +11,3 @@ define void @test_tmmultf32ps() { } declare void @llvm.x86.tmmultf32ps(i8 %A, i8 %B, i8 %C) -define void @test_ttmmultf32ps() { -; CHECK-LABEL: test_ttmmultf32ps: -; CHECK: # %bb.0: -; CHECK-NEXT: ttmmultf32ps %tmm3, %tmm2, %tmm1 -; CHECK-NEXT: retq - call void @llvm.x86.ttmmultf32ps(i8 1, i8 2, i8 3) - ret void -} -declare void @llvm.x86.ttmmultf32ps(i8 %A, i8 %B, i8 %C) - diff --git a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll deleted file mode 100755 index 1f5758c..0000000 --- a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll +++ /dev/null @@ -1,122 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O0 -; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O2 -; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR - -define void @test_amx(i64 %stride, i8* %addr1) #0 { -; CHECK-LABEL: test_amx: -; CHECK: # %bb.0: -; CHECK-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 -; CHECK-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 -; CHECK-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 -; CHECK-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 -; CHECK-NEXT: retq -; -; EGPR-LABEL: test_amx: -; EGPR: # %bb.0: -; EGPR-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x04,0x3e] -; EGPR-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x3e] -; EGPR-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x04,0x3e] -; EGPR-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x3e] -; EGPR-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.t2rpntlvwz0rs(i8 1, i8* %addr1, i64 %stride) - call void @llvm.x86.t2rpntlvwz0rst1(i8 2, i8* %addr1, i64 %stride) - call void @llvm.x86.t2rpntlvwz1rs(i8 1, i8* %addr1, i64 %stride) - call void @llvm.x86.t2rpntlvwz1rst1(i8 2, i8* %addr1, i64 %stride) - ret void -} -declare void @llvm.x86.t2rpntlvwz0rs(i8 , i8* , i64 ) -declare void @llvm.x86.t2rpntlvwz0rst1(i8 , i8* , i64 ) -declare void @llvm.x86.t2rpntlvwz1rs(i8 , i8* , i64 ) -declare void @llvm.x86.t2rpntlvwz1rst1(i8 , i8* , i64 ) - -define void @test_amx2(i8* %base, i64 %stride) #0 { -; O0-LABEL: test_amx2: -; O0: # %bb.0: -; O0-NEXT: xorps %xmm0, %xmm0 -; O0-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; O0-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; O0-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; O0-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; O0-NEXT: movb $1, -{{[0-9]+}}(%rsp) -; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al -; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; O0-NEXT: # implicit-def: $al -; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; O0-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) -; O0-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 -; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al -; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; O0-NEXT: # implicit-def: $al -; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; O0-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) -; O0-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 -; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al -; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; O0-NEXT: # implicit-def: $al -; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; O0-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) -; O0-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 -; O0-NEXT: tilerelease -; O0-NEXT: retq -; -; O2-LABEL: test_amx2: -; O2: # %bb.0: -; O2-NEXT: xorps %xmm0, %xmm0 -; O2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; O2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; O2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; O2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; O2-NEXT: movb $1, -{{[0-9]+}}(%rsp) -; O2-NEXT: movb $8, -{{[0-9]+}}(%rsp) -; O2-NEXT: movw $8, -{{[0-9]+}}(%rsp) -; O2-NEXT: movb $8, -{{[0-9]+}}(%rsp) -; O2-NEXT: movw $8, -{{[0-9]+}}(%rsp) -; O2-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) -; O2-NEXT: movw $8, %ax -; O2-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 -; O2-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 -; O2-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 -; O2-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 -; O2-NEXT: tilerelease -; O2-NEXT: retq -; -; EGPR-LABEL: test_amx2: -; EGPR: # %bb.0: -; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] -; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xc0] -; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xd0] -; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xe0] -; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xf0] -; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01] -; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08] -; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00] -; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x08] -; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x08,0x00] -; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0] -; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00] -; EGPR-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x24,0x37] -; EGPR-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x24,0x37] -; EGPR-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x24,0x37] -; EGPR-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x24,0x37] -; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0] -; EGPR-NEXT: retq # encoding: [0xc3] - call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) - call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) - call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) - call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) - ret void -} -declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16, i16, i16, i8*, i64) -declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16, i16, i16, i8*, i64) -declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16, i16, i16, i8*, i64) -declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal(i16, i16, i16, i8*, i64) diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_O2_to_O0.ll b/llvm/test/CodeGen/X86/amx_tile_pair_O2_to_O0.ll deleted file mode 100644 index 4f41410..0000000 --- a/llvm/test/CodeGen/X86/amx_tile_pair_O2_to_O0.ll +++ /dev/null @@ -1,136 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \ -; RUN: -mattr=+amx-transpose -verify-machineinstrs | FileCheck %s - -@buf = dso_local global [2048 x i8] zeroinitializer, align 16 -@buf2 = dso_local global [2048 x i8] zeroinitializer, align 16 - -define dso_local void @test_tile_2rpntlvwz0(i16 noundef signext %row, i16 noundef signext %col0, i16 noundef signext %col1) local_unnamed_addr #0 { -; CHECK-LABEL: test_tile_2rpntlvwz0: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00 -; CHECK-NEXT: subq $8192, %rsp # imm = 0x2000 -; CHECK-NEXT: .cfi_offset %rbx, -24 -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp) -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx -; CHECK-NEXT: movw %si, %cx -; CHECK-NEXT: movw %di, %ax -; CHECK-NEXT: # implicit-def: $al -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: # implicit-def: $al -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: # implicit-def: $al -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: # implicit-def: $cl -; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: # implicit-def: $al -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: # implicit-def: $al -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: # implicit-def: $al -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: # implicit-def: $al -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; CHECK-NEXT: movl $buf, %esi -; CHECK-NEXT: movl $32, %edi -; CHECK-NEXT: t2rpntlvwz0 (%rsi,%rdi), %tmm4 -; CHECK-NEXT: movabsq $64, %rbx -; CHECK-NEXT: tilestored %tmm5, (%rsp,%rbx) # 1024-byte Folded Spill -; CHECK-NEXT: tileloadd (%rsp,%rbx), %tmm0 # 1024-byte Folded Reload -; CHECK-NEXT: movabsq $64, %rbx -; CHECK-NEXT: tilestored %tmm4, 1024(%rsp,%rbx) # 1024-byte Folded Spill -; CHECK-NEXT: tileloadd 1024(%rsp,%rbx), %tmm1 # 1024-byte Folded Reload -; CHECK-NEXT: movl $64, %edi -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; CHECK-NEXT: tilestored %tmm1, (%rsi,%rdi) -; CHECK-NEXT: movl $64, %edi -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; CHECK-NEXT: tilestored %tmm0, (%rsi,%rdi) -; CHECK-NEXT: tilezero %tmm0 -; CHECK-NEXT: movl $64, %edi -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; CHECK-NEXT: tilestored %tmm0, (%rsi,%rdi) -; CHECK-NEXT: movl $64, %edi -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; CHECK-NEXT: tileloadd (%rsi,%rdi), %tmm1 -; CHECK-NEXT: movl $64, %edi -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; CHECK-NEXT: tileloadd (%rsi,%rdi), %tmm2 -; CHECK-NEXT: movl $64, %edi -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; CHECK-NEXT: tileloadd (%rsi,%rdi), %tmm0 -; CHECK-NEXT: tdpbssd %tmm2, %tmm1, %tmm0 -; CHECK-NEXT: movl $64, %edi -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; CHECK-NEXT: tilestored %tmm0, (%rsi,%rdi) -; CHECK-NEXT: movl $64, %edi -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; CHECK-NEXT: tileloadd (%rsi,%rdi), %tmm0 -; CHECK-NEXT: movl $buf2, %edx -; CHECK-NEXT: movl $32, %esi -; CHECK-NEXT: tilestored %tmm0, (%rdx,%rsi) -; CHECK-NEXT: leaq -8(%rbp), %rsp -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: tilerelease -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = tail call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %row, i16 %col0, i16 %col1, ptr @buf, i64 32) #3 - %1 = extractvalue { x86_amx, x86_amx } %0, 0 - %2 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %1) #3 - %3 = extractvalue { x86_amx, x86_amx } %0, 1 - %4 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %3) #3 - %5 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col0) #3 - %6 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %5) #3 - %7 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %6) #3 - %8 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %2) #3 - %9 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %4) #3 - %10 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col1, i16 %col0, x86_amx %7, x86_amx %8, x86_amx %9) #3 - %11 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %10) #3 - %12 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %11) #3 - tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col0, ptr @buf2, i64 32, x86_amx %12) #3 - ret void -} - -declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, ptr, i64) #1 - -declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) #2 - -declare x86_amx @llvm.x86.tilezero.internal(i16, i16) #3 - -declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #3 - -declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) #2 - -declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx) #4 - -attributes #0 = { nounwind uwtable "target-cpu"="x86-64" "target-features"="+amx-bf16,+amx-int8,+amx-tile,+amx-transpose" } -attributes #1 = { argmemonly nofree nounwind readonly } -attributes #2 = { nofree nosync nounwind readnone } -attributes #3 = { nounwind } -attributes #4 = { argmemonly nounwind writeonly } - -!llvm.module.flags = !{!0, !1, !2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"uwtable", i32 2} -!2 = !{i32 7, !"frame-pointer", i32 2} diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_configure_O0.mir b/llvm/test/CodeGen/X86/amx_tile_pair_configure_O0.mir deleted file mode 100644 index ab12ab3..0000000 --- a/llvm/test/CodeGen/X86/amx_tile_pair_configure_O0.mir +++ /dev/null @@ -1,165 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \ -# RUN: -mattr=+amx-transpose -run-pass=fasttileconfig -o - %s | FileCheck %s - ---- -name: test_tile_2rpntlvwz0 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHContTarget: false -hasEHScopes: false -hasEHFunclets: false -failsVerification: false -tracksDebugUserValues: false -registers: [] -liveins: - - { reg: '$edi', virtual-reg: '' } - - { reg: '$esi', virtual-reg: '' } - - { reg: '$edx', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1024 - adjustsStack: false - hasCalls: true - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: [] - restorePoint: [] -fixedStack: [] -stack: - - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: default, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, name: '', type: default, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 3, name: '', type: default, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 4, name: '', type: default, offset: 0, size: 64, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 5, name: '', type: spill-slot, offset: 0, size: 2, alignment: 2, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 6, name: '', type: spill-slot, offset: 0, size: 2, alignment: 2, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 7, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: - amxProgModel: ManagedRA -body: | - bb.0.entry: - liveins: $rdi, $rsi, $rdx, $rax - - ; CHECK-LABEL: name: test_tile_2rpntlvwz0 - ; CHECK: liveins: $rdi, $rsi, $rdx, $rax - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $zmm0 = AVX512_512_SET0 - ; CHECK-NEXT: VMOVUPSZmr %stack.4, 1, $noreg, 0, $noreg, killed renamable $zmm0 :: (store (s512) into %stack.4, align 4) - ; CHECK-NEXT: MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4) - ; CHECK-NEXT: renamable $rcx = MOV32ri64 64 - ; CHECK-NEXT: MOV64mr %stack.7, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.7) - ; CHECK-NEXT: renamable $cx = MOV16ri 64 - ; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 0, $noreg, $cx :: (store (s16) into %stack.5) - ; CHECK-NEXT: renamable $cx = MOV16ri 16 - ; CHECK-NEXT: renamable $r8w = MOV16ri 16 - ; CHECK-NEXT: MOV16mr %stack.6, 1, $noreg, 0, $noreg, $r8w :: (store (s16) into %stack.6) - ; CHECK-NEXT: $al = IMPLICIT_DEF - ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.4 + 48, align 4) - ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.4 + 16, align 4) - ; CHECK-NEXT: $al = IMPLICIT_DEF - ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 50, $noreg, $al :: (store (s512) into %stack.4 + 50, align 2, basealign 4) - ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 20, $noreg, $cx :: (store (s512) into %stack.4 + 20, align 4) - ; CHECK-NEXT: $al = IMPLICIT_DEF - ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 49, $noreg, $al :: (store (s512) into %stack.4 + 49, align 1, basealign 4) - ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 18, $noreg, $di :: (store (s512) into %stack.4 + 18, align 2, basealign 4) - ; CHECK-NEXT: $al = IMPLICIT_DEF - ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.4 + 48, align 4) - ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.4 + 16, align 4) - ; CHECK-NEXT: $al = IMPLICIT_DEF - ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.4 + 48, align 4) - ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.4 + 16, align 4) - ; CHECK-NEXT: $al = IMPLICIT_DEF - ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 52, $noreg, $al :: (store (s512) into %stack.4 + 52, align 4) - ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 24, $noreg, $cx :: (store (s512) into %stack.4 + 24, align 4) - ; CHECK-NEXT: $al = IMPLICIT_DEF - ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 53, $noreg, $al :: (store (s512) into %stack.4 + 53, align 1, basealign 4) - ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 26, $noreg, $di :: (store (s512) into %stack.4 + 26, align 2, basealign 4) - ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.4, align 4) - ; CHECK-NEXT: renamable $r9 = COPY $rsi - ; CHECK-NEXT: $rsi = MOV64rm %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7) - ; CHECK-NEXT: renamable $r8 = COPY $rdi - ; CHECK-NEXT: $di = MOV16rm %stack.6, 1, $noreg, 0, $noreg :: (load (s16) from %stack.6) - ; CHECK-NEXT: renamable $r10 = COPY $rax - ; CHECK-NEXT: $ax = MOV16rm %stack.5, 1, $noreg, 0, $noreg :: (load (s16) from %stack.5) - ; CHECK-NEXT: renamable $tmm4_tmm5 = PT2RPNTLVWZ0V renamable $ax, renamable $cx, renamable $di, renamable $rdx, 1, killed renamable $r10, 0, $noreg - ; CHECK-NEXT: renamable $tmm0 = COPY renamable $tmm5 - ; CHECK-NEXT: renamable $tmm1 = COPY renamable $tmm4, implicit killed $tmm4_tmm5 - ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, renamable $r9, 1, renamable $rsi, 0, $noreg, killed renamable $tmm1 - ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $di, renamable $r8, 1, renamable $rsi, 0, $noreg, killed renamable $tmm0 - ; CHECK-NEXT: renamable $tmm0 = PTILEZEROV renamable $ax, renamable $cx - ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, renamable $rdx, 1, renamable $rsi, 0, $noreg, killed renamable $tmm0 - ; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $r9, 1, renamable $rsi, 0, $noreg - ; CHECK-NEXT: renamable $tmm1 = PTILELOADDV renamable $ax, renamable $di, killed renamable $r8, 1, renamable $rsi, 0, $noreg - ; CHECK-NEXT: renamable $tmm2 = PTILELOADDV renamable $ax, renamable $cx, renamable $rdx, 1, renamable $rsi, 0, $noreg - ; CHECK-NEXT: renamable $tmm0 = PTDPBSSDV renamable $ax, renamable $cx, killed renamable $di, renamable $tmm0, killed renamable $tmm1, killed renamable $tmm2 - ; CHECK-NEXT: PTILESTOREDV killed renamable $ax, killed renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0 - renamable $zmm0 = AVX512_512_SET0 - VMOVUPSZmr %stack.4, 1, $noreg, 0, $noreg, killed renamable $zmm0 :: (store (s512) into %stack.4, align 4) - MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4) - renamable $rcx = MOV32ri64 64 - MOV64mr %stack.7, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.7) - renamable $cx = MOV16ri 64 - MOV16mr %stack.5, 1, $noreg, 0, $noreg, $cx :: (store (s16) into %stack.5) - renamable $cx = MOV16ri 16 - renamable $r8w = MOV16ri 16 - MOV16mr %stack.6, 1, $noreg, 0, $noreg, $r8w :: (store (s16) into %stack.6) - PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.4, align 4) - renamable $r9 = COPY $rsi - $rsi = MOV64rm %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7) - renamable $r8 = COPY $rdi - $di = MOV16rm %stack.6, 1, $noreg, 0, $noreg :: (load (s16) from %stack.6) - renamable $r10 = COPY $rax - $ax = MOV16rm %stack.5, 1, $noreg, 0, $noreg :: (load (s16) from %stack.5) - renamable $tmm4_tmm5 = PT2RPNTLVWZ0V renamable $ax, renamable $cx, renamable $di, renamable $rdx, 1, killed renamable $r10, 0, $noreg - renamable $tmm0 = COPY renamable $tmm5 - renamable $tmm1 = COPY renamable $tmm4, implicit killed $tmm4_tmm5 - PTILESTOREDV renamable $ax, renamable $cx, renamable $r9, 1, renamable $rsi, 0, $noreg, killed renamable $tmm1 - PTILESTOREDV renamable $ax, renamable $di, renamable $r8, 1, renamable $rsi, 0, $noreg, killed renamable $tmm0 - renamable $tmm0 = PTILEZEROV renamable $ax, renamable $cx - PTILESTOREDV renamable $ax, renamable $cx, renamable $rdx, 1, renamable $rsi, 0, $noreg, killed renamable $tmm0 - renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $r9, 1, renamable $rsi, 0, $noreg - renamable $tmm1 = PTILELOADDV renamable $ax, renamable $di, killed renamable $r8, 1, renamable $rsi, 0, $noreg - renamable $tmm2 = PTILELOADDV renamable $ax, renamable $cx, renamable $rdx, 1, renamable $rsi, 0, $noreg - renamable $tmm0 = PTDPBSSDV renamable $ax, renamable $cx, killed renamable $di, renamable $tmm0, killed renamable $tmm1, killed renamable $tmm2 - PTILESTOREDV killed renamable $ax, killed renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0 -... diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_configure_O2.mir b/llvm/test/CodeGen/X86/amx_tile_pair_configure_O2.mir deleted file mode 100644 index c7d241f..0000000 --- a/llvm/test/CodeGen/X86/amx_tile_pair_configure_O2.mir +++ /dev/null @@ -1,153 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \ -# RUN: -mattr=+amx-transpose -run-pass=greedy,tileconfig -o - %s | FileCheck %s - ---- | - @buf = dso_local global [2048 x i8] zeroinitializer, align 16 - @buf2 = dso_local global [2048 x i8] zeroinitializer, align 16 - - define dso_local void @test_tile_2rpntlvwz0(i16 noundef signext %row, i16 noundef signext %col0, i16 noundef signext %col1) local_unnamed_addr #0 { - entry: - %0 = tail call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %row, i16 %col0, i16 %col1, i8* getelementptr inbounds ([2048 x i8], [2048 x i8]* @buf, i64 0, i64 0), i64 32) #5 - %1 = extractvalue { x86_amx, x86_amx } %0, 0 - %2 = extractvalue { x86_amx, x86_amx } %0, 1 - %3 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col0) #5 - %4 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col1, i16 %col0, x86_amx %3, x86_amx %1, x86_amx %2) #5 - tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col0, i8* getelementptr inbounds ([2048 x i8], [2048 x i8]* @buf2, i64 0, i64 0), i64 32, x86_amx %4) #5 - ret void - } - - declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, i8*, i64) #1 - - declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) #2 - - declare x86_amx @llvm.x86.tilezero.internal(i16, i16) #3 - - declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #3 - - declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) #2 - - declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) #4 - - attributes #0 = { nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="8192" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+amx-bf16,+amx-int8,+amx-tile,+amx-transpose,+avx,+avx2,+avx512f,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+amx-tile,+amx-bf16,+avx512f,+amx-transpose" "tune-cpu"="generic" } - attributes #1 = { argmemonly nounwind readonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #2 = { nounwind readnone "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #3 = { nounwind "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #4 = { argmemonly nounwind writeonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #5 = { nounwind } - -... ---- -name: test_tile_2rpntlvwz0 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHContTarget: false -hasEHScopes: false -hasEHFunclets: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: gr32, preferred-register: '' } - - { id: 1, class: gr32, preferred-register: '' } - - { id: 2, class: gr32, preferred-register: '' } - - { id: 3, class: gr16, preferred-register: '' } - - { id: 4, class: gr16, preferred-register: '' } - - { id: 5, class: gr16, preferred-register: '' } - - { id: 6, class: gr64, preferred-register: '' } - - { id: 7, class: gr64_nosp, preferred-register: '' } - - { id: 8, class: tilepair, preferred-register: '' } - - { id: 9, class: tile, preferred-register: '' } - - { id: 10, class: tile, preferred-register: '' } - - { id: 11, class: tile, preferred-register: '' } - - { id: 12, class: tile, preferred-register: '' } - - { id: 13, class: gr64, preferred-register: '' } - - { id: 14, class: vr512, preferred-register: '' } -liveins: - - { reg: '$edi', virtual-reg: '%0' } - - { reg: '$esi', virtual-reg: '%1' } - - { reg: '$edx', virtual-reg: '%2' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: [] - restorePoint: [] -fixedStack: [] -stack: - - { id: 0, name: '', type: default, offset: 0, size: 64, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: - amxProgModel: ManagedRA -body: | - bb.0.entry: - liveins: $edi, $esi, $edx - - - ; CHECK-LABEL: name: test_tile_2rpntlvwz0 - ; CHECK: liveins: $edi, $esi, $edx - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0 - ; CHECK-NEXT: VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.0, align 4) - ; CHECK-NEXT: MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4) - ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 26, $noreg, [[COPY]].sub_16bit :: (store (s512) into %stack.0 + 26, align 2, basealign 4) - ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 53, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 53, align 1, basealign 4) - ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 24, $noreg, [[COPY1]].sub_16bit :: (store (s512) into %stack.0 + 24, align 4) - ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 52, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 52, align 4) - ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 16, $noreg, [[COPY]].sub_16bit :: (store (s512) into %stack.0 + 16, align 4) - ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 48, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 48, align 4) - ; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4) - ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 @buf - ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32 - ; CHECK-NEXT: [[PT2RPNTLVWZ0V:%[0-9]+]]:tilepair = PT2RPNTLVWZ0V [[COPY2]].sub_16bit, [[COPY1]].sub_16bit, [[COPY]].sub_16bit, [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg - ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[COPY2]].sub_16bit, [[COPY1]].sub_16bit - ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTDPBSSDV [[COPY2]].sub_16bit, [[COPY]].sub_16bit, [[COPY1]].sub_16bit, [[PTILEZEROV]], [[PT2RPNTLVWZ0V]].sub_t0, [[PT2RPNTLVWZ0V]].sub_t1 - ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64 = MOV32ri64 @buf2 - ; CHECK-NEXT: PTILESTOREDV [[COPY2]].sub_16bit, [[COPY1]].sub_16bit, [[MOV32ri64_2]], 1, [[MOV32ri64_1]], 0, $noreg, [[PTILEZEROV]] - ; CHECK-NEXT: RET 0 - %2:gr32 = COPY $edx - %1:gr32 = COPY $esi - %0:gr32 = COPY $edi - %14:vr512 = AVX512_512_SET0 - VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, %14 :: (store (s512) into %stack.0, align 4) - MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4) - PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4) - %6:gr64 = MOV32ri64 @buf - %7:gr64_nosp = MOV32ri64 32 - %8:tilepair = PT2RPNTLVWZ0V %0.sub_16bit, %1.sub_16bit, %2.sub_16bit, %6, 1, %7, 0, $noreg - %12:tile = PTILEZEROV %0.sub_16bit, %1.sub_16bit - %12:tile = PTDPBSSDV %0.sub_16bit, %2.sub_16bit, %1.sub_16bit, %12, %8.sub_t0, %8.sub_t1 - %13:gr64 = MOV32ri64 @buf2 - PTILESTOREDV %0.sub_16bit, %1.sub_16bit, %13, 1, %7, 0, $noreg, %12 - RET 0 - -... diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_copy.mir b/llvm/test/CodeGen/X86/amx_tile_pair_copy.mir deleted file mode 100644 index 66b15aa..0000000 --- a/llvm/test/CodeGen/X86/amx_tile_pair_copy.mir +++ /dev/null @@ -1,97 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \ -# RUN: -mattr=+amx-transpose -run-pass=lowertilecopy -o - %s | FileCheck %s - ---- -name: test_tile_2rpntlvwz0 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHContTarget: false -hasEHScopes: false -hasEHFunclets: false -failsVerification: false -tracksDebugUserValues: false -registers: [] -liveins: - - { reg: '$edi', virtual-reg: '' } - - { reg: '$esi', virtual-reg: '' } - - { reg: '$edx', virtual-reg: '' } - - { reg: '$cx', virtual-reg: '' } - - { reg: '$r9', virtual-reg: '' } - - { reg: '$r10', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1024 - adjustsStack: false - hasCalls: true - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: [] - restorePoint: [] -fixedStack: [] -stack: - - { id: 43, name: '', type: default, offset: 0, size: 64, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 68, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: - amxProgModel: ManagedRA -body: | - bb.0.entry: - liveins: $edi, $esi, $edx, $cx, $di, $r8w, $r11, $r10, $rbx, $r8, $r9 - - - ; CHECK-LABEL: name: test_tile_2rpntlvwz0 - ; CHECK: liveins: $edi, $esi, $edx, $cx, $di, $r8w, $r11, $r10, $rbx, $r8, $r9 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4) - ; CHECK-NEXT: renamable $tmm4_tmm5 = PT2RPNTLVWZ0V killed renamable $cx, killed renamable $di, killed renamable $r8w, killed renamable $r11, 1, killed renamable $rbx, 0, $noreg - ; CHECK-NEXT: $rax = MOV64ri 64 - ; CHECK-NEXT: TILESTORED %stack.3, 1, $rax, 0, $noreg, $tmm5 :: (store (s8192) into %stack.3) - ; CHECK-NEXT: $tmm0 = TILELOADD %stack.3, 1, killed $rax, 0, $noreg :: (load (s8192) from %stack.3) - ; CHECK-NEXT: $rax = MOV64ri 64 - ; CHECK-NEXT: TILESTORED %stack.2, 1, $rax, 0, $noreg, $tmm4 :: (store (s8192) into %stack.2) - ; CHECK-NEXT: $tmm1 = TILELOADD %stack.2, 1, killed $rax, 0, $noreg :: (load (s8192) from %stack.2) - ; CHECK-NEXT: renamable $r8 = MOV32ri64 64 - ; CHECK-NEXT: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $r8 :: (store (s64) into %stack.1) - ; CHECK-NEXT: renamable $di = MOV16ri 64 - ; CHECK-NEXT: renamable $cx = MOV16ri 16 - ; CHECK-NEXT: PTILESTOREDV renamable $cx, renamable $di, killed renamable $r10, 1, renamable $r8, 0, $noreg, killed renamable $tmm1 - ; CHECK-NEXT: PTILESTOREDV killed renamable $cx, killed renamable $di, killed renamable $r9, 1, renamable $r8, 0, $noreg, killed renamable $tmm0 - PLDTILECFGV %stack.43, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.43, align 4) - renamable $tmm4_tmm5 = PT2RPNTLVWZ0V killed renamable $cx, killed renamable $di, killed renamable $r8w, killed renamable $r11, 1, killed renamable $rbx, 0, $noreg - renamable $tmm0 = COPY renamable $tmm5 - renamable $tmm1 = COPY renamable $tmm4, implicit killed $tmm4_tmm5 - renamable $r8 = MOV32ri64 64 - MOV64mr %stack.68, 1, $noreg, 0, $noreg, $r8 :: (store (s64) into %stack.68) - renamable $di = MOV16ri 64 - renamable $cx = MOV16ri 16 - PTILESTOREDV renamable $cx, renamable $di, killed renamable $r10, 1, renamable $r8, 0, $noreg, killed renamable $tmm1 - PTILESTOREDV killed renamable $cx, killed renamable $di, killed renamable $r9, 1, renamable $r8, 0, $noreg, killed renamable $tmm0 - -... diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll deleted file mode 100644 index 3549875..0000000 --- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll +++ /dev/null @@ -1,87 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py - ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s - ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s - - @buf = dso_local global [2048 x i8] zeroinitializer, align 16 - - ; Function Attrs: noinline nounwind optnone uwtable - define dso_local void @test_tile_2rpntlvwz0(i16 noundef signext %row, i16 noundef signext %col0, i16 noundef signext %col1, ptr %m) #0 { -; CHECK-LABEL: @test_tile_2rpntlvwz0( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = udiv i16 [[COL1:%.*]], 4 -; CHECK-NEXT: [[TMP1:%.*]] = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 [[ROW:%.*]], i16 [[COL0:%.*]], i16 [[COL1]], ptr @buf, i64 32) #[[ATTR3:[0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { x86_amx, x86_amx } [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[COL0]] to i64 -; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL0]], ptr [[M:%.*]], i64 [[TMP3]], x86_amx [[TMP2]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { x86_amx, x86_amx } [[TMP1]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[COL1]] to i64 -; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL1]], ptr [[M]], i64 [[TMP6]], x86_amx [[TMP5]]) -; CHECK-NEXT: [[TMP8:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 [[ROW]], i16 [[COL0]]) #[[ATTR3]] -; CHECK-NEXT: [[TMP9:%.*]] = sext i16 [[COL0]] to i64 -; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL0]], ptr [[M]], i64 [[TMP9]], x86_amx [[TMP8]]) -; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[COL0]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL0]], ptr [[M]], i64 [[TMP11]]) -; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[COL1]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL1]], ptr [[M]], i64 [[TMP14]]) -; CHECK-NEXT: [[TMP17:%.*]] = sext i16 [[COL0]] to i64 -; CHECK-NEXT: [[TMP19:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[TMP0]], i16 [[COL0]], ptr [[M]], i64 [[TMP17]]) -; CHECK-NEXT: [[TMP20:%.*]] = call x86_amx @llvm.x86.tdpbssd.internal(i16 [[ROW]], i16 [[COL0]], i16 [[COL1]], x86_amx [[TMP13]], x86_amx [[TMP16]], x86_amx [[TMP19]]) #[[ATTR3]] -; CHECK-NEXT: [[TMP21:%.*]] = sext i16 [[COL0]] to i64 -; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL0]], ptr [[M]], i64 [[TMP21]], x86_amx [[TMP20]]) -; CHECK-NEXT: ret void -; - entry: - - %0 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %row, i16 %col0, i16 %col1, ptr getelementptr inbounds ([2048 x i8], ptr @buf, i64 0, i64 0), i64 32) #7 - %1 = extractvalue { x86_amx, x86_amx } %0, 0 - %2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %1) #7 - store <256 x i32> %2, ptr %m, align 1024 - - %3 = extractvalue { x86_amx, x86_amx } %0, 1 - %4 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %3) #7 - store <256 x i32> %4, ptr %m, align 1024 - - %5 = call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col0) #7 - %6 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %5) #7 - store <256 x i32> %6, ptr %m, align 64 - - %7 = load <256 x i32>, ptr %m, align 64 - %8 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %7) #7 - %9 = load <256 x i32>, ptr %m, align 64 - %10 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %9) #7 - %11 = load <256 x i32>, ptr %m, align 64 - %12 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %11) #7 - - %13 = call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col0, i16 %col1, x86_amx %8, x86_amx %10, x86_amx %12) #7 - %14 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %13) #7 - store <256 x i32> %14, ptr %m, align 64 - - ret void - } - - ; Function Attrs: argmemonly nounwind readonly - declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, ptr, i64) #2 - - ; Function Attrs: nounwind readnone - declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) #3 - - ; Function Attrs: nounwind - declare x86_amx @llvm.x86.tilezero.internal(i16, i16) #4 - - ; Function Attrs: nounwind - declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #4 - - ; Function Attrs: nounwind readnone - declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) #3 - - ; Function Attrs: argmemonly nounwind writeonly - declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx) #5 - - attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="8192" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+amx-bf16,+amx-int8,+amx-tile,+amx-transpose,+avx,+avx2,+avx512f,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+amx-tile,+amx-bf16,+avx512f,+amx-transpose" "tune-cpu"="generic" } - attributes #1 = { argmemonly nofree nounwind willreturn writeonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #2 = { argmemonly nounwind readonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #3 = { nounwind readnone "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #4 = { nounwind "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #5 = { argmemonly nounwind writeonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #6 = { argmemonly nofree nounwind willreturn "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #7 = { nounwind } diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll deleted file mode 100644 index 96966264..0000000 --- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll +++ /dev/null @@ -1,61 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s -; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s - - @buf = dso_local global [2048 x i8] zeroinitializer, align 16 - @buf2 = dso_local global [2048 x i8] zeroinitializer, align 16 - - ; Function Attrs: nounwind uwtable - define dso_local void @test_tile_2rpntlvwz0(i16 noundef signext %row, i16 noundef signext %col0, i16 noundef signext %col1) local_unnamed_addr #0 { -; CHECK-LABEL: @test_tile_2rpntlvwz0( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 [[ROW:%.*]], i16 [[COL0:%.*]], i16 [[COL1:%.*]], ptr @buf, i64 32) #[[ATTR3:[0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { x86_amx, x86_amx } [[TMP0]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { x86_amx, x86_amx } [[TMP0]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_amx @llvm.x86.tilezero.internal(i16 [[ROW]], i16 [[COL0]]) #[[ATTR3]] -; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 [[ROW]], i16 [[COL1]], i16 [[COL0]], x86_amx [[TMP3]], x86_amx [[TMP1]], x86_amx [[TMP2]]) #[[ATTR3]] -; CHECK-NEXT: tail call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL0]], ptr @buf2, i64 32, x86_amx [[TMP4]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; - entry: - %0 = tail call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %row, i16 %col0, i16 %col1, ptr @buf, i64 32) #5 - %1 = extractvalue { x86_amx, x86_amx } %0, 0 - %2 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %1) #5 - %3 = extractvalue { x86_amx, x86_amx } %0, 1 - %4 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %3) #5 - %5 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col0) #5 - %6 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %5) #5 - %7 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %6) #5 - %8 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %2) #5 - %9 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %4) #5 - %10 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col1, i16 %col0, x86_amx %7, x86_amx %8, x86_amx %9) #5 - %11 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %10) #5 - %12 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %11) #5 - tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col0, ptr @buf2, i64 32, x86_amx %12) #5 - ret void - } - - ; Function Attrs: argmemonly nounwind readonly - declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, ptr, i64) #1 - - ; Function Attrs: nounwind readnone - declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) #2 - - ; Function Attrs: nounwind - declare x86_amx @llvm.x86.tilezero.internal(i16, i16) #3 - - ; Function Attrs: nounwind - declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #3 - - ; Function Attrs: nounwind readnone - declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) #2 - - ; Function Attrs: argmemonly nounwind writeonly - declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx) #4 - - attributes #0 = { nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="8192" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+amx-bf16,+amx-int8,+amx-tile,+amx-transpose,+avx,+avx2,+avx512f,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+amx-tile,+amx-bf16,+avx512f,+amx-transpose" "tune-cpu"="generic" } - attributes #1 = { argmemonly nounwind readonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #2 = { nounwind readnone "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #3 = { nounwind "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #4 = { argmemonly nounwind writeonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } - attributes #5 = { nounwind } diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O0.mir b/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O0.mir deleted file mode 100644 index 1e3b242..0000000 --- a/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O0.mir +++ /dev/null @@ -1,134 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \ -# RUN: -mattr=+amx-transpose -run-pass=fastpretileconfig -o - %s | FileCheck %s - ---- -name: test_tile_2rpntlvwz0 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHContTarget: false -hasEHScopes: false -hasEHFunclets: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: gr64_nosp, preferred-register: '' } - - { id: 1, class: gr16, preferred-register: '' } - - { id: 2, class: gr16, preferred-register: '' } - - { id: 3, class: gr16, preferred-register: '' } - - { id: 4, class: gr64, preferred-register: '' } - - { id: 5, class: gr64, preferred-register: '' } - - { id: 6, class: gr64, preferred-register: '' } - - { id: 7, class: gr64_nosp, preferred-register: '' } - - { id: 8, class: tilepair, preferred-register: '' } - - { id: 9, class: tile, preferred-register: '' } - - { id: 10, class: tile, preferred-register: '' } - - { id: 11, class: tile, preferred-register: '' } - - { id: 181, class: tile, preferred-register: '' } - - { id: 183, class: tile, preferred-register: '' } - - { id: 185, class: tile, preferred-register: '' } - - { id: 186, class: tile, preferred-register: '' } -liveins: - - { reg: '$edi', virtual-reg: '%0' } - - { reg: '$esi', virtual-reg: '%1' } - - { reg: '$edx', virtual-reg: '%2' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1024 - adjustsStack: false - hasCalls: true - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: [] - restorePoint: [] -fixedStack: [] -stack: - - { id: 18, name: '', type: default, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 19, name: '', type: default, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 20, name: '', type: default, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 21, name: '', type: default, offset: 0, size: 8, - alignment: 8, stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: - amxProgModel: ManagedRA -body: | - bb.0.entry: - liveins: $rdi, $rsi, $rdx, $rax - - ; CHECK-LABEL: name: test_tile_2rpntlvwz0 - ; CHECK: liveins: $rdi, $rsi, $rdx, $rax - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0 - ; CHECK-NEXT: VMOVUPSZmr %stack.4, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.4, align 4) - ; CHECK-NEXT: MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4) - ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 64 - ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64 - ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.4, align 4) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rdx - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64_nosp = COPY $rax - ; CHECK-NEXT: [[PT2RPNTLVWZ0V:%[0-9]+]]:tilepair = PT2RPNTLVWZ0V [[MOV16ri]], [[MOV16ri1]], [[MOV16ri2]], [[COPY2]], 1, killed [[COPY3]], 0, $noreg - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:tile = COPY [[PT2RPNTLVWZ0V]].sub_t1 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:tile = COPY [[PT2RPNTLVWZ0V]].sub_t0 - ; CHECK-NEXT: PTILESTOREDV [[MOV16ri]], [[MOV16ri1]], [[COPY]], 1, [[MOV32ri64_]], 0, $noreg, killed [[COPY5]] - ; CHECK-NEXT: PTILESTOREDV [[MOV16ri]], [[MOV16ri2]], [[COPY1]], 1, [[MOV32ri64_]], 0, $noreg, killed [[COPY4]] - ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri]], [[MOV16ri1]] - ; CHECK-NEXT: PTILESTOREDV [[MOV16ri]], [[MOV16ri1]], [[COPY2]], 1, [[MOV32ri64_]], 0, $noreg, killed [[PTILEZEROV]] - ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri]], [[MOV16ri1]], [[COPY]], 1, [[MOV32ri64_]], 0, $noreg - ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri]], [[MOV16ri2]], [[COPY1]], 1, [[MOV32ri64_]], 0, $noreg - ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri]], [[MOV16ri1]], [[COPY2]], 1, [[MOV32ri64_]], 0, $noreg - ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[MOV16ri]], [[MOV16ri1]], [[MOV16ri2]], [[PTILELOADDV]], killed [[PTILELOADDV1]], killed [[PTILELOADDV2]] - ; CHECK-NEXT: PTILESTOREDV killed [[MOV16ri]], killed [[MOV16ri1]], killed [[COPY2]], 1, killed [[MOV32ri64_]], 0, $noreg, killed [[PTDPBSSDV]] - %0:gr64_nosp = MOV32ri64 64 - %1:gr16 = MOV16ri 64 - %2:gr16 = MOV16ri 16 - %3:gr16 = MOV16ri 16 - %4:gr64 = COPY $rsi - %5:gr64 = COPY $rdi - %6:gr64 = COPY $rdx - %7:gr64_nosp = COPY $rax - %8:tilepair = PT2RPNTLVWZ0V %1, %2, %3, %6, 1, killed %7, 0, $noreg - %9:tile = COPY %8.sub_t1 - %10:tile = COPY %8.sub_t0 - PTILESTOREDV %1, %2, %4, 1, %0, 0, $noreg, killed %10 - PTILESTOREDV %1, %3, %5, 1, %0, 0, $noreg, killed %9 - %11:tile = PTILEZEROV %1, %2 - PTILESTOREDV %1, %2, %6, 1, %0, 0, $noreg, killed %11 - %181:tile = PTILELOADDV %1, %2, %4, 1, %0, 0, $noreg - %183:tile = PTILELOADDV %1, %3, %5, 1, %0, 0, $noreg - %185:tile = PTILELOADDV %1, %2, %6, 1, %0, 0, $noreg - %186:tile = PTDPBSSDV %1, %2, %3, %181, killed %183, killed %185 - PTILESTOREDV killed %1, killed %2, killed %6, 1, killed %0, 0, $noreg, killed %186 -... diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O2.mir b/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O2.mir deleted file mode 100644 index ac2cdb4..0000000 --- a/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O2.mir +++ /dev/null @@ -1,113 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \ -# RUN: -mattr=+amx-transpose -run-pass=tilepreconfig -o - %s | FileCheck %s - ---- -name: test_tile_2rpntlvwz0 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHContTarget: false -hasEHScopes: false -hasEHFunclets: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: gr32, preferred-register: '' } - - { id: 1, class: gr32, preferred-register: '' } - - { id: 2, class: gr32, preferred-register: '' } - - { id: 3, class: gr16, preferred-register: '' } - - { id: 4, class: gr16, preferred-register: '' } - - { id: 5, class: gr16, preferred-register: '' } - - { id: 6, class: gr64, preferred-register: '' } - - { id: 7, class: gr64_nosp, preferred-register: '' } - - { id: 8, class: tilepair, preferred-register: '' } - - { id: 9, class: tile, preferred-register: '' } - - { id: 10, class: tile, preferred-register: '' } - - { id: 11, class: tile, preferred-register: '' } - - { id: 12, class: tile, preferred-register: '' } - - { id: 13, class: gr64, preferred-register: '' } -liveins: - - { reg: '$edi', virtual-reg: '%0' } - - { reg: '$esi', virtual-reg: '%1' } - - { reg: '$edx', virtual-reg: '%2' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: [] - restorePoint: [] -fixedStack: [] -stack: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: - amxProgModel: ManagedRA -body: | - bb.0.entry: - liveins: $edi, $esi, $edx, $rax, $rbx - - ; CHECK-LABEL: name: test_tile_2rpntlvwz0 - ; CHECK: liveins: $edi, $esi, $edx, $rax, $rbx - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0 - ; CHECK-NEXT: VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.0, align 4) - ; CHECK-NEXT: MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY [[COPY1]].sub_16bit - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.0, align 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax - ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32 - ; CHECK-NEXT: [[PT2RPNTLVWZ0V:%[0-9]+]]:tilepair = PT2RPNTLVWZ0V [[COPY5]], [[COPY4]], [[COPY3]], killed [[COPY6]], 1, [[MOV32ri64_]], 0, $noreg - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:tile = COPY [[PT2RPNTLVWZ0V]].sub_t1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:tile = COPY [[PT2RPNTLVWZ0V]].sub_t0 - ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[COPY5]], [[COPY4]] - ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[COPY5]], [[COPY3]], [[COPY4]], [[PTILEZEROV]], killed [[COPY8]], killed [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY $rbx - ; CHECK-NEXT: PTILESTOREDV [[COPY5]], [[COPY4]], killed [[COPY9]], 1, [[MOV32ri64_]], 0, $noreg, killed [[PTDPBSSDV]] - ; CHECK-NEXT: RET 0 - %2:gr32 = COPY $edx - %1:gr32 = COPY $esi - %0:gr32 = COPY $edi - %3:gr16 = COPY %2.sub_16bit - %4:gr16 = COPY %1.sub_16bit - %5:gr16 = COPY %0.sub_16bit - %6:gr64 = COPY $rax - %7:gr64_nosp = MOV32ri64 32 - %8:tilepair = PT2RPNTLVWZ0V %5, %4, %3, killed %6, 1, %7, 0, $noreg - %9:tile = COPY %8.sub_t1 - %10:tile = COPY %8.sub_t0 - %11:tile = PTILEZEROV %5, %4 - %12:tile = PTDPBSSDV %5, %3, %4, %11, killed %10, killed %9 - %13:gr64 = COPY $rbx - PTILESTOREDV %5, %4, killed %13, 1, %7, 0, $noreg, killed %12 - RET 0 - -... diff --git a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll deleted file mode 100644 index 4cfd97a..0000000 --- a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll +++ /dev/null @@ -1,371 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR - -define void @test_amx(i32 %rv32, i64 %stride, i64 %rvalue, i8* %addr1, <4 x float> %xmm) #0 { -; CHECK-LABEL: test_amx: -; CHECK: # %bb.0: -; CHECK-NEXT: t2rpntlvwz0 (%rcx,%rsi), %tmm0 -; CHECK-NEXT: t2rpntlvwz0t1 (%rcx,%rsi), %tmm2 -; CHECK-NEXT: t2rpntlvwz1 (%rcx,%rsi), %tmm0 -; CHECK-NEXT: t2rpntlvwz1t1 (%rcx,%rsi), %tmm2 -; CHECK-NEXT: ttransposed %tmm3, %tmm1 -; CHECK-NEXT: ttdpbf16ps %tmm3, %tmm2, %tmm1 -; CHECK-NEXT: ttdpfp16ps %tmm6, %tmm5, %tmm4 -; CHECK-NEXT: ttcmmimfp16ps %tmm3, %tmm2, %tmm1 -; CHECK-NEXT: ttcmmrlfp16ps %tmm3, %tmm2, %tmm1 -; CHECK-NEXT: tconjtcmmimfp16ps %tmm3, %tmm2, %tmm1 -; CHECK-NEXT: tconjtfp16 %tmm2, %tmm1 -; CHECK-NEXT: retq -; -; EGPR-LABEL: test_amx: -; EGPR: # %bb.0: -; EGPR-NEXT: t2rpntlvwz0 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x04,0x31] -; EGPR-NEXT: t2rpntlvwz0t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x14,0x31] -; EGPR-NEXT: t2rpntlvwz1 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x04,0x31] -; EGPR-NEXT: t2rpntlvwz1t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x31] -; EGPR-NEXT: ttransposed %tmm3, %tmm1 # encoding: [0xc4,0xe2,0x7a,0x5f,0xcb] -; EGPR-NEXT: ttdpbf16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6c,0xca] -; EGPR-NEXT: ttdpfp16ps %tmm6, %tmm5, %tmm4 # encoding: [0xc4,0xe2,0x4b,0x6c,0xe5] -; EGPR-NEXT: ttcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x63,0x6b,0xca] -; EGPR-NEXT: ttcmmrlfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6b,0xca] -; EGPR-NEXT: tconjtcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x60,0x6b,0xca] -; EGPR-NEXT: tconjtfp16 %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x79,0x6b,0xca] -; EGPR-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.t2rpntlvwz0(i8 1, i8* %addr1, i64 %stride) - call void @llvm.x86.t2rpntlvwz0t1(i8 2, i8* %addr1, i64 %stride) - call void @llvm.x86.t2rpntlvwz1(i8 1, i8* %addr1, i64 %stride) - call void @llvm.x86.t2rpntlvwz1t1(i8 2, i8* %addr1, i64 %stride) - call void @llvm.x86.ttransposed(i8 1, i8 3) - call void @llvm.x86.ttdpbf16ps(i8 1, i8 2, i8 3) - call void @llvm.x86.ttdpfp16ps(i8 4, i8 5, i8 6) - call void @llvm.x86.ttcmmimfp16ps(i8 1, i8 2, i8 3) - call void @llvm.x86.ttcmmrlfp16ps(i8 1, i8 2, i8 3) - call void @llvm.x86.tconjtcmmimfp16ps(i8 1, i8 2, i8 3) - call void @llvm.x86.tconjtfp16(i8 1, i8 2) - ret void -} - -declare void @llvm.x86.t2rpntlvwz0(i8 %tile1, i8* %addr1, i64 %stride) -declare void @llvm.x86.t2rpntlvwz0t1(i8 %tile1, i8* %addr1, i64 %stride) -declare void @llvm.x86.t2rpntlvwz1(i8 %tile1, i8* %addr1, i64 %stride) -declare void @llvm.x86.t2rpntlvwz1t1(i8 %tile1, i8* %addr1, i64 %stride) -declare void @llvm.x86.ttransposed(i8 %tile0, i8 %tile1) -declare void @llvm.x86.ttdpbf16ps(i8 %tile0, i8 %tile1, i8 %tile2) -declare void @llvm.x86.ttdpfp16ps(i8 %tile0, i8 %tile1, i8 %tile2) -declare void @llvm.x86.ttcmmimfp16ps(i8 %A, i8 %B, i8 %C) -declare void @llvm.x86.ttcmmrlfp16ps(i8 %A, i8 %B, i8 %C) -declare void @llvm.x86.tconjtcmmimfp16ps(i8 %A, i8 %B, i8 %C) -declare void @llvm.x86.tconjtfp16(i8 %A, i8 %B) - -define void @test_amx2(i8* %pointer, i8* %base, i64 %stride) #0 { -; CHECK-LABEL: test_amx2: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: subq $2928, %rsp # imm = 0xB70 -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, %ax -; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm0 -; CHECK-NEXT: tilezero %tmm1 -; CHECK-NEXT: tilezero %tmm2 -; CHECK-NEXT: ttdpbf16ps %tmm1, %tmm0, %tmm2 -; CHECK-NEXT: ttdpfp16ps %tmm1, %tmm0, %tmm2 -; CHECK-NEXT: ttcmmimfp16ps %tmm1, %tmm0, %tmm2 -; CHECK-NEXT: ttcmmrlfp16ps %tmm1, %tmm0, %tmm2 -; CHECK-NEXT: movabsq $64, %rbp -; CHECK-NEXT: tilestored %tmm2, 896(%rsp,%rbp) # 1024-byte Folded Spill -; CHECK-NEXT: tileloadd 896(%rsp,%rbp), %tmm3 # 1024-byte Folded Reload -; CHECK-NEXT: tconjtcmmimfp16ps %tmm1, %tmm0, %tmm3 -; CHECK-NEXT: tconjtfp16 %tmm3, %tmm0 -; CHECK-NEXT: tilestored %tmm2, (%rdi,%rdx) -; CHECK-NEXT: addq $2928, %rsp # imm = 0xB70 -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: tilerelease -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; EGPR-LABEL: test_amx2: -; EGPR: # %bb.0: -; EGPR-NEXT: pushq %rbp # encoding: [0x55] -; EGPR-NEXT: subq $2928, %rsp # encoding: [0x48,0x81,0xec,0x70,0x0b,0x00,0x00] -; EGPR-NEXT: # imm = 0xB70 -; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] -; EGPR-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0x0d] -; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x40,0x03,0x00,0x00,0x01] -; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x70,0x03,0x00,0x00,0x08] -; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x50,0x03,0x00,0x00,0x08,0x00] -; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x71,0x03,0x00,0x00,0x08] -; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x52,0x03,0x00,0x00,0x08,0x00] -; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x72,0x03,0x00,0x00,0x08] -; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x54,0x03,0x00,0x00,0x08,0x00] -; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x73,0x03,0x00,0x00,0x08] -; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x56,0x03,0x00,0x00,0x08,0x00] -; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0x40,0x03,0x00,0x00] -; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00] -; EGPR-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16] -; EGPR-NEXT: tilezero %tmm1 # encoding: [0xc4,0xe2,0x7b,0x49,0xc8] -; EGPR-NEXT: tilezero %tmm2 # encoding: [0xc4,0xe2,0x7b,0x49,0xd0] -; EGPR-NEXT: ttdpbf16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6c,0xd0] -; EGPR-NEXT: ttdpfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6c,0xd0] -; EGPR-NEXT: ttcmmimfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6b,0xd0] -; EGPR-NEXT: ttcmmrlfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6b,0xd0] -; EGPR-NEXT: movabsq $64, %rbp # encoding: [0x48,0xbd,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00] -; EGPR-NEXT: tilestored %tmm2, 896(%rsp,%rbp) # 1024-byte Folded Spill -; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x94,0x2c,0x80,0x03,0x00,0x00] -; EGPR-NEXT: tileloadd 896(%rsp,%rbp), %tmm3 # 1024-byte Folded Reload -; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x9c,0x2c,0x80,0x03,0x00,0x00] -; EGPR-NEXT: tconjtcmmimfp16ps %tmm1, %tmm0, %tmm3 # encoding: [0xc4,0xe2,0x70,0x6b,0xd8] -; EGPR-NEXT: tconjtfp16 %tmm3, %tmm0 # encoding: [0xc4,0xe2,0x79,0x6b,0xc3] -; EGPR-NEXT: tilestored %tmm2, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x14,0x17] -; EGPR-NEXT: addq $2928, %rsp # encoding: [0x48,0x81,0xc4,0x70,0x0b,0x00,0x00] -; EGPR-NEXT: # imm = 0xB70 -; EGPR-NEXT: popq %rbp # encoding: [0x5d] -; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0] -; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; EGPR-NEXT: retq # encoding: [0xc3] - - %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride) - %b = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8) - %c = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8) - %c1 = call x86_amx @llvm.x86.ttdpbf16ps.internal(i16 8, i16 8, i16 8, x86_amx %c, x86_amx %a, x86_amx %b) - %c2 = call x86_amx @llvm.x86.ttdpfp16ps.internal(i16 8, i16 8, i16 8, x86_amx %c1, x86_amx %a, x86_amx %b) - %c3 = call x86_amx @llvm.x86.ttcmmimfp16ps.internal(i16 8, i16 8, i16 8, x86_amx %c2, x86_amx %a, x86_amx %b) - %c4 = call x86_amx @llvm.x86.ttcmmrlfp16ps.internal(i16 8, i16 8, i16 8, x86_amx %c3, x86_amx %a, x86_amx %b) - %c5 = call x86_amx @llvm.x86.tconjtcmmimfp16ps.internal(i16 8, i16 8, i16 8, x86_amx %c4, x86_amx %a, x86_amx %b) - %c6 = call x86_amx @llvm.x86.tconjtfp16.internal(i16 8, i16 8, x86_amx %c5) - - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %c4) - ret void -} - -define void @test_amx3(i8* %pointer, i8* %base, i64 %stride) #0 { -; CHECK-LABEL: test_amx3: -; CHECK: # %bb.0: -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movw $8, %cx -; CHECK-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 -; CHECK-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm4 -; CHECK-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm4 -; CHECK-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm4 -; CHECK-NEXT: ttransposed %tmm4, %tmm0 -; CHECK-NEXT: tilestored %tmm0, (%rdi,%rdx) -; CHECK-NEXT: tilerelease -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; EGPR-LABEL: test_amx3: -; EGPR: # %bb.0: -; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] -; EGPR-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xff] -; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01] -; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf0,0x08] -; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd0,0x08,0x00] -; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08] -; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00] -; EGPR-NEXT: movb $0, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x00] -; EGPR-NEXT: movw $0, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x00,0x00] -; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0] -; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] -; EGPR-NEXT: movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00] -; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16] -; EGPR-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x24,0x16] -; EGPR-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x24,0x16] -; EGPR-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x24,0x16] -; EGPR-NEXT: ttransposed %tmm4, %tmm0 # encoding: [0xc4,0xe2,0x7a,0x5f,0xc4] -; EGPR-NEXT: tilestored %tmm0, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x17] -; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0] -; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; EGPR-NEXT: retq # encoding: [0xc3] - %1 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride) - %2 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride) - %3 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride) - %4 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride) - %5 = extractvalue { x86_amx, x86_amx } %4, 0 - %6 = call x86_amx @llvm.x86.ttransposed.internal(i16 8, i16 8, x86_amx %5) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %6) - ret void -} - -define void @test_amx_spill(i8* %pointer, i8* %base, i64 %stride) #0 { -; CHECK-LABEL: test_amx_spill: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $6088, %rsp # imm = 0x17C8 -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, %ax -; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm0 -; CHECK-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 -; CHECK-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm6 -; CHECK-NEXT: movabsq $64, %rcx -; CHECK-NEXT: tilestored %tmm6, 4032(%rsp,%rcx) # 1024-byte Folded Spill -; CHECK-NEXT: tilestored %tmm7, 5056(%rsp,%rcx) # 1024-byte Folded Spill -; CHECK-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm6 -; CHECK-NEXT: tilestored %tmm6, 1984(%rsp,%rcx) # 1024-byte Folded Spill -; CHECK-NEXT: tilestored %tmm7, 3008(%rsp,%rcx) # 1024-byte Folded Spill -; CHECK-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm6 -; CHECK-NEXT: tilestored %tmm6, -64(%rsp,%rcx) # 1024-byte Folded Spill -; CHECK-NEXT: tilestored %tmm7, 960(%rsp,%rcx) # 1024-byte Folded Spill -; CHECK-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm6 -; CHECK-NEXT: tilestored %tmm4, (%rsi,%rdx) -; CHECK-NEXT: tilestored %tmm5, (%rsi,%rdx) -; CHECK-NEXT: tileloadd 4032(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload -; CHECK-NEXT: tileloadd 5056(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload -; CHECK-NEXT: tilestored %tmm4, (%rsi,%rdx) -; CHECK-NEXT: tilestored %tmm5, (%rsi,%rdx) -; CHECK-NEXT: tileloadd 1984(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload -; CHECK-NEXT: tileloadd 3008(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload -; CHECK-NEXT: tilestored %tmm4, (%rsi,%rdx) -; CHECK-NEXT: tilestored %tmm5, (%rsi,%rdx) -; CHECK-NEXT: tileloadd -64(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload -; CHECK-NEXT: tileloadd 960(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload -; CHECK-NEXT: tilestored %tmm4, (%rsi,%rdx) -; CHECK-NEXT: tilestored %tmm5, (%rsi,%rdx) -; CHECK-NEXT: tilestored %tmm6, (%rsi,%rdx) -; CHECK-NEXT: tilestored %tmm7, (%rsi,%rdx) -; CHECK-NEXT: addq $6088, %rsp # imm = 0x17C8 -; CHECK-NEXT: tilerelease -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; EGPR-LABEL: test_amx_spill: -; EGPR: # %bb.0: -; EGPR-NEXT: subq $6088, %rsp # encoding: [0x48,0x81,0xec,0xc8,0x17,0x00,0x00] -; EGPR-NEXT: # imm = 0x17C8 -; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] -; EGPR-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xfe] -; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x80,0x01] -; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb0,0x08] -; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x90,0x08,0x00] -; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb4,0x08] -; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x98,0x08,0x00] -; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb5,0x08] -; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9a,0x08,0x00] -; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb6,0x08] -; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9c,0x08,0x00] -; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb7,0x08] -; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9e,0x08,0x00] -; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x80] -; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00] -; EGPR-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16] -; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16] -; EGPR-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x34,0x16] -; EGPR-NEXT: movabsq $64, %rcx # encoding: [0x48,0xb9,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00] -; EGPR-NEXT: tilestored %tmm6, 4032(%rsp,%rcx) # 1024-byte Folded Spill -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x0f,0x00,0x00] -; EGPR-NEXT: tilestored %tmm7, 5056(%rsp,%rcx) # 1024-byte Folded Spill -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x13,0x00,0x00] -; EGPR-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x34,0x16] -; EGPR-NEXT: tilestored %tmm6, 1984(%rsp,%rcx) # 1024-byte Folded Spill -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x07,0x00,0x00] -; EGPR-NEXT: tilestored %tmm7, 3008(%rsp,%rcx) # 1024-byte Folded Spill -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x0b,0x00,0x00] -; EGPR-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x34,0x16] -; EGPR-NEXT: tilestored %tmm6, -64(%rsp,%rcx) # 1024-byte Folded Spill -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0x74,0x0c,0xc0] -; EGPR-NEXT: tilestored %tmm7, 960(%rsp,%rcx) # 1024-byte Folded Spill -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x03,0x00,0x00] -; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x34,0x16] -; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16] -; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16] -; EGPR-NEXT: tileloadd 4032(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x0f,0x00,0x00] -; EGPR-NEXT: tileloadd 5056(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x13,0x00,0x00] -; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16] -; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16] -; EGPR-NEXT: tileloadd 1984(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x07,0x00,0x00] -; EGPR-NEXT: tileloadd 3008(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x0b,0x00,0x00] -; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16] -; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16] -; EGPR-NEXT: tileloadd -64(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0x64,0x0c,0xc0] -; EGPR-NEXT: tileloadd 960(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload -; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x03,0x00,0x00] -; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16] -; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16] -; EGPR-NEXT: tilestored %tmm6, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x34,0x16] -; EGPR-NEXT: tilestored %tmm7, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x3c,0x16] -; EGPR-NEXT: addq $6088, %rsp # encoding: [0x48,0x81,0xc4,0xc8,0x17,0x00,0x00] -; EGPR-NEXT: # imm = 0x17C8 -; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0] -; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; EGPR-NEXT: retq # encoding: [0xc3] - %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride) - %b1 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) - %b2 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) - %b3 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) - %b4 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) - %b5 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride) - %e11 = extractvalue { x86_amx, x86_amx } %b1, 0 - %e12 = extractvalue { x86_amx, x86_amx } %b1, 1 - %e21 = extractvalue { x86_amx, x86_amx } %b2, 0 - %e22 = extractvalue { x86_amx, x86_amx } %b2, 1 - %e31 = extractvalue { x86_amx, x86_amx } %b3, 0 - %e32 = extractvalue { x86_amx, x86_amx } %b3, 1 - %e41 = extractvalue { x86_amx, x86_amx } %b4, 0 - %e42 = extractvalue { x86_amx, x86_amx } %b4, 1 - %e51 = extractvalue { x86_amx, x86_amx } %b5, 0 - %e52 = extractvalue { x86_amx, x86_amx } %b5, 1 - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e11) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e12) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e21) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e22) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e31) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e32) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e41) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e42) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e51) - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e52) - ret void -} - -declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64) -declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) -declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, i8*, i64) -declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16, i16, i16, i8*, i64) -declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal(i16, i16, i16, i8*, i64) -declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal(i16, i16, i16, i8*, i64) -declare x86_amx @llvm.x86.ttransposed.internal(i16, i16, x86_amx) -declare x86_amx @llvm.x86.ttdpbf16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) -declare x86_amx @llvm.x86.ttdpfp16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) -declare x86_amx @llvm.x86.ttcmmimfp16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) -declare x86_amx @llvm.x86.ttcmmrlfp16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) -declare x86_amx @llvm.x86.tconjtcmmimfp16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) -declare x86_amx @llvm.x86.tconjtfp16.internal(i16, i16, x86_amx) - -attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index 06e7d47..8007d9d 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -203,14 +203,24 @@ define i1 @init_eq_i32(ptr %word, i32 %position, i1 zeroext %value) nounwind { define i1 @test_ne_i64(ptr %word, i32 %position) nounwind { ; X86-LABEL: test_ne_i64: ; X86: # %bb.0: +; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $32, %edx -; X86-NEXT: shrl $3, %edx -; X86-NEXT: movl (%eax,%edx), %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: setb %al +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $1, %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: shll %cl, %edx +; X86-NEXT: testb $32, %cl +; X86-NEXT: je .LBB5_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %edx, %esi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: .LBB5_2: +; X86-NEXT: andl 4(%eax), %esi +; X86-NEXT: andl (%eax), %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: setne %al +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: test_ne_i64: @@ -232,20 +242,38 @@ define i1 @test_ne_i64(ptr %word, i32 %position) nounwind { define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind { ; X86-LABEL: complement_ne_i64: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: andl $32, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setb %al -; X86-NEXT: btcl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $1, %eax +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: shll %cl, %eax +; X86-NEXT: testb $32, %cl +; X86-NEXT: je .LBB6_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB6_2: +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: movl 4(%edx), %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: andl %esi, %ebx +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: andl %eax, %ebp +; X86-NEXT: xorl %esi, %edi +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: setne %al +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %edi, 4(%edx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: complement_ne_i64: @@ -272,20 +300,40 @@ define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind { define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind { ; X86-LABEL: reset_eq_i64: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: andl $32, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setae %al -; X86-NEXT: btrl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $1, %esi +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: testb $32, %cl +; X86-NEXT: je .LBB7_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %esi, %edi +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: .LBB7_2: +; X86-NEXT: movl (%edx), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: andl %edi, %ebx +; X86-NEXT: notl %edi +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: andl %esi, %ebp +; X86-NEXT: notl %esi +; X86-NEXT: andl %ecx, %edi +; X86-NEXT: andl %eax, %esi +; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: sete %al +; X86-NEXT: movl %esi, (%edx) +; X86-NEXT: movl %edi, 4(%edx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: reset_eq_i64: @@ -313,20 +361,38 @@ define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind { define i1 @set_ne_i64(ptr %word, i32 %position) nounwind { ; X86-LABEL: set_ne_i64: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: andl $32, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setb %al -; X86-NEXT: btsl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $1, %eax +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: shll %cl, %eax +; X86-NEXT: testb $32, %cl +; X86-NEXT: je .LBB8_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB8_2: +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: movl 4(%edx), %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: andl %esi, %ebx +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: andl %eax, %ebp +; X86-NEXT: orl %esi, %edi +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: setne %al +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %edi, 4(%edx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: set_ne_i64: @@ -353,47 +419,52 @@ define i1 @set_ne_i64(ptr %word, i32 %position) nounwind { define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; X86-LABEL: init_eq_i64: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl $1, %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: shll %cl, %edx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: shldl %cl, %eax, %edi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $1, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: shldl %cl, %eax, %edx ; X86-NEXT: shll %cl, %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %cl ; X86-NEXT: je .LBB9_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl $0, %edx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl $0, %eax ; X86-NEXT: .LBB9_2: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: notl %esi -; X86-NEXT: notl %edx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: notl %ebp ; X86-NEXT: je .LBB9_4 ; X86-NEXT: # %bb.3: -; X86-NEXT: movl %eax, %edi -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %esi, %edi +; X86-NEXT: xorl %esi, %esi ; X86-NEXT: .LBB9_4: -; X86-NEXT: andl 4(%ebx), %esi -; X86-NEXT: orl %edi, %esi -; X86-NEXT: andl (%ebx), %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl $32, %eax -; X86-NEXT: shrl $3, %eax -; X86-NEXT: movl (%ebx,%eax), %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: setae %al -; X86-NEXT: movl %esi, 4(%ebx) -; X86-NEXT: movl %edx, (%ebx) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl 4(%ecx), %ecx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: andl %ecx, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl (%edi), %ecx +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: andl %ecx, %ebp +; X86-NEXT: orl %esi, %ebp +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %ebp, (%edi) +; X86-NEXT: movl %ebx, 4(%edi) +; X86-NEXT: sete %al ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; SSE-LABEL: init_eq_i64: @@ -445,25 +516,101 @@ define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind { define i1 @test_ne_i128(ptr %word, i32 %position) nounwind { ; X86-LABEL: test_ne_i128: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $96, %edx -; X86-NEXT: shrl $3, %edx -; X86-NEXT: movl (%eax,%edx), %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: setb %al +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movzbl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, (%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %esi +; X86-NEXT: movl 24(%esp,%esi), %edi +; X86-NEXT: movl 28(%esp,%esi), %eax +; X86-NEXT: shldl %cl, %edi, %eax +; X86-NEXT: movl 16(%esp,%esi), %edx +; X86-NEXT: movl 20(%esp,%esi), %esi +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: andl 8(%ebx), %edi +; X86-NEXT: andl (%ebx), %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: andl 12(%ebx), %eax +; X86-NEXT: andl 4(%ebx), %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: test_ne_i128: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: andl $96, %eax -; X64-NEXT: shrl $3, %eax -; X64-NEXT: movl (%rdi,%rax), %eax -; X64-NEXT: btl %esi, %eax -; X64-NEXT: setb %al -; X64-NEXT: retq +; SSE-LABEL: test_ne_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %eax +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: shldq %cl, %rax, %rdx +; SSE-NEXT: xorl %esi, %esi +; SSE-NEXT: shlq %cl, %rax +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rax, %rdx +; SSE-NEXT: cmovneq %rsi, %rax +; SSE-NEXT: andq 8(%rdi), %rdx +; SSE-NEXT: andq (%rdi), %rax +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ne_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: movl $1, %edx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: shldq %cl, %rdx, %rsi +; AVX2-NEXT: shlxq %rcx, %rdx, %rdx +; AVX2-NEXT: testb $64, %cl +; AVX2-NEXT: cmovneq %rdx, %rsi +; AVX2-NEXT: cmovneq %rax, %rdx +; AVX2-NEXT: andq 8(%rdi), %rsi +; AVX2-NEXT: andq (%rdi), %rdx +; AVX2-NEXT: orq %rsi, %rdx +; AVX2-NEXT: setne %al +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ne_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: movl $1, %eax +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: shldq %cl, %rax, %rdx +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: shlxq %rcx, %rax, %rax +; AVX512-NEXT: testb $64, %cl +; AVX512-NEXT: cmovneq %rax, %rdx +; AVX512-NEXT: cmovneq %rsi, %rax +; AVX512-NEXT: andq 8(%rdi), %rdx +; AVX512-NEXT: andq (%rdi), %rax +; AVX512-NEXT: orq %rdx, %rax +; AVX512-NEXT: setne %al +; AVX512-NEXT: retq %rem = and i32 %position, 127 %ofs = zext nneg i32 %rem to i128 %bit = shl nuw i128 1, %ofs @@ -476,33 +623,124 @@ define i1 @test_ne_i128(ptr %word, i32 %position) nounwind { define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind { ; X86-LABEL: complement_ne_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: andl $96, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setb %al -; X86-NEXT: btcl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $80, %esp +; X86-NEXT: movzbl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movl 56(%esp,%eax), %esi +; X86-NEXT: movl 60(%esp,%eax), %edx +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%esp,%eax), %edi +; X86-NEXT: movl 52(%esp,%eax), %ebx +; X86-NEXT: shldl %cl, %ebx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edi, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl 8(%eax), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %eax +; X86-NEXT: movl (%ecx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: andl %edi, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl 12(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %esi, 12(%eax) +; X86-NEXT: movl %edi, (%eax) +; X86-NEXT: movl %ebx, 4(%eax) +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: complement_ne_i128: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: andl $96, %ecx -; X64-NEXT: shrl $3, %ecx -; X64-NEXT: movl (%rdi,%rcx), %edx -; X64-NEXT: btl %esi, %edx -; X64-NEXT: setb %al -; X64-NEXT: btcl %esi, %edx -; X64-NEXT: movl %edx, (%rdi,%rcx) -; X64-NEXT: retq +; SSE-LABEL: complement_ne_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %edx +; SSE-NEXT: xorl %esi, %esi +; SSE-NEXT: shldq %cl, %rdx, %rsi +; SSE-NEXT: shlq %cl, %rdx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rdx, %rsi +; SSE-NEXT: cmovneq %rax, %rdx +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: movq %rcx, %r8 +; SSE-NEXT: andq %rsi, %r8 +; SSE-NEXT: movq %rax, %r9 +; SSE-NEXT: andq %rdx, %r9 +; SSE-NEXT: xorq %rcx, %rsi +; SSE-NEXT: xorq %rax, %rdx +; SSE-NEXT: orq %r8, %r9 +; SSE-NEXT: setne %al +; SSE-NEXT: movq %rdx, (%rdi) +; SSE-NEXT: movq %rsi, 8(%rdi) +; SSE-NEXT: retq +; +; AVX-LABEL: complement_ne_i128: +; AVX: # %bb.0: +; AVX-NEXT: movl %esi, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: movl $1, %edx +; AVX-NEXT: xorl %esi, %esi +; AVX-NEXT: shldq %cl, %rdx, %rsi +; AVX-NEXT: shlxq %rcx, %rdx, %rdx +; AVX-NEXT: testb $64, %cl +; AVX-NEXT: cmovneq %rdx, %rsi +; AVX-NEXT: cmovneq %rax, %rdx +; AVX-NEXT: movq (%rdi), %rax +; AVX-NEXT: movq 8(%rdi), %rcx +; AVX-NEXT: movq %rcx, %r8 +; AVX-NEXT: andq %rsi, %r8 +; AVX-NEXT: movq %rax, %r9 +; AVX-NEXT: andq %rdx, %r9 +; AVX-NEXT: xorq %rcx, %rsi +; AVX-NEXT: xorq %rax, %rdx +; AVX-NEXT: orq %r8, %r9 +; AVX-NEXT: setne %al +; AVX-NEXT: movq %rdx, (%rdi) +; AVX-NEXT: movq %rsi, 8(%rdi) +; AVX-NEXT: retq %rem = and i32 %position, 127 %ofs = zext nneg i32 %rem to i128 %bit = shl nuw i128 1, %ofs @@ -517,33 +755,124 @@ define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind { define i1 @reset_eq_i128(ptr %word, i32 %position) nounwind { ; X86-LABEL: reset_eq_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: andl $96, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setae %al -; X86-NEXT: btrl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $80, %esp +; X86-NEXT: movzbl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movl 56(%esp,%eax), %edx +; X86-NEXT: movl 60(%esp,%eax), %esi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%esp,%eax), %esi +; X86-NEXT: movl 52(%esp,%eax), %edi +; X86-NEXT: shldl %cl, %edi, %edx +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movl 8(%ebx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edx, %eax +; X86-NEXT: movl (%ebx), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%ebx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: movl 4(%ebx), %ebx +; X86-NEXT: andl %ebx, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: notl %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: notl %ecx +; X86-NEXT: andl %ebx, %ecx +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl 8(%ebp), %edi +; X86-NEXT: movl %edx, 8(%edi) +; X86-NEXT: movl %eax, 12(%edi) +; X86-NEXT: movl %esi, (%edi) +; X86-NEXT: movl %ecx, 4(%edi) +; X86-NEXT: sete %al +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: reset_eq_i128: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: andl $96, %ecx -; X64-NEXT: shrl $3, %ecx -; X64-NEXT: movl (%rdi,%rcx), %edx -; X64-NEXT: btl %esi, %edx -; X64-NEXT: setae %al -; X64-NEXT: btrl %esi, %edx -; X64-NEXT: movl %edx, (%rdi,%rcx) -; X64-NEXT: retq +; SSE-LABEL: reset_eq_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %edx +; SSE-NEXT: xorl %esi, %esi +; SSE-NEXT: shldq %cl, %rdx, %rsi +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: shlq %cl, %rdx +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rdx, %rsi +; SSE-NEXT: cmovneq %rax, %rdx +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: movq %rcx, %r8 +; SSE-NEXT: andq %rsi, %r8 +; SSE-NEXT: notq %rsi +; SSE-NEXT: movq %rax, %r9 +; SSE-NEXT: andq %rdx, %r9 +; SSE-NEXT: notq %rdx +; SSE-NEXT: andq %rcx, %rsi +; SSE-NEXT: andq %rax, %rdx +; SSE-NEXT: orq %r8, %r9 +; SSE-NEXT: sete %al +; SSE-NEXT: movq %rdx, (%rdi) +; SSE-NEXT: movq %rsi, 8(%rdi) +; SSE-NEXT: retq +; +; AVX-LABEL: reset_eq_i128: +; AVX: # %bb.0: +; AVX-NEXT: movl %esi, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: movl $1, %edx +; AVX-NEXT: xorl %esi, %esi +; AVX-NEXT: shldq %cl, %rdx, %rsi +; AVX-NEXT: shlxq %rcx, %rdx, %rdx +; AVX-NEXT: testb $64, %cl +; AVX-NEXT: cmovneq %rdx, %rsi +; AVX-NEXT: cmovneq %rax, %rdx +; AVX-NEXT: movq (%rdi), %rax +; AVX-NEXT: movq 8(%rdi), %rcx +; AVX-NEXT: andnq %rcx, %rsi, %r8 +; AVX-NEXT: andq %rsi, %rcx +; AVX-NEXT: andnq %rax, %rdx, %rsi +; AVX-NEXT: andq %rdx, %rax +; AVX-NEXT: orq %rcx, %rax +; AVX-NEXT: sete %al +; AVX-NEXT: movq %rsi, (%rdi) +; AVX-NEXT: movq %r8, 8(%rdi) +; AVX-NEXT: retq %rem = and i32 %position, 127 %ofs = zext nneg i32 %rem to i128 %bit = shl nuw i128 1, %ofs @@ -559,33 +888,124 @@ define i1 @reset_eq_i128(ptr %word, i32 %position) nounwind { define i1 @set_ne_i128(ptr %word, i32 %position) nounwind { ; X86-LABEL: set_ne_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: andl $96, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setb %al -; X86-NEXT: btsl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $80, %esp +; X86-NEXT: movzbl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movl 56(%esp,%eax), %esi +; X86-NEXT: movl 60(%esp,%eax), %edx +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%esp,%eax), %edi +; X86-NEXT: movl 52(%esp,%eax), %ebx +; X86-NEXT: shldl %cl, %ebx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edi, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl 8(%eax), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %eax +; X86-NEXT: movl (%ecx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: andl %edi, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl 12(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %esi, 12(%eax) +; X86-NEXT: movl %edi, (%eax) +; X86-NEXT: movl %ebx, 4(%eax) +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: set_ne_i128: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: andl $96, %ecx -; X64-NEXT: shrl $3, %ecx -; X64-NEXT: movl (%rdi,%rcx), %edx -; X64-NEXT: btl %esi, %edx -; X64-NEXT: setb %al -; X64-NEXT: btsl %esi, %edx -; X64-NEXT: movl %edx, (%rdi,%rcx) -; X64-NEXT: retq +; SSE-LABEL: set_ne_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %edx +; SSE-NEXT: xorl %esi, %esi +; SSE-NEXT: shldq %cl, %rdx, %rsi +; SSE-NEXT: shlq %cl, %rdx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rdx, %rsi +; SSE-NEXT: cmovneq %rax, %rdx +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: movq %rcx, %r8 +; SSE-NEXT: andq %rsi, %r8 +; SSE-NEXT: movq %rax, %r9 +; SSE-NEXT: andq %rdx, %r9 +; SSE-NEXT: orq %rcx, %rsi +; SSE-NEXT: orq %rax, %rdx +; SSE-NEXT: orq %r8, %r9 +; SSE-NEXT: setne %al +; SSE-NEXT: movq %rdx, (%rdi) +; SSE-NEXT: movq %rsi, 8(%rdi) +; SSE-NEXT: retq +; +; AVX-LABEL: set_ne_i128: +; AVX: # %bb.0: +; AVX-NEXT: movl %esi, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: movl $1, %edx +; AVX-NEXT: xorl %esi, %esi +; AVX-NEXT: shldq %cl, %rdx, %rsi +; AVX-NEXT: shlxq %rcx, %rdx, %rdx +; AVX-NEXT: testb $64, %cl +; AVX-NEXT: cmovneq %rdx, %rsi +; AVX-NEXT: cmovneq %rax, %rdx +; AVX-NEXT: movq (%rdi), %rax +; AVX-NEXT: movq 8(%rdi), %rcx +; AVX-NEXT: movq %rcx, %r8 +; AVX-NEXT: andq %rsi, %r8 +; AVX-NEXT: movq %rax, %r9 +; AVX-NEXT: andq %rdx, %r9 +; AVX-NEXT: orq %rcx, %rsi +; AVX-NEXT: orq %rax, %rdx +; AVX-NEXT: orq %r8, %r9 +; AVX-NEXT: setne %al +; AVX-NEXT: movq %rdx, (%rdi) +; AVX-NEXT: movq %rsi, 8(%rdi) +; AVX-NEXT: retq %rem = and i32 %position, 127 %ofs = zext nneg i32 %rem to i128 %bit = shl nuw i128 1, %ofs @@ -606,9 +1026,9 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $96, %esp -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movzbl 16(%ebp), %ebx +; X86-NEXT: subl $128, %esp +; X86-NEXT: movzbl 12(%ebp), %ecx +; X86-NEXT: movzbl 16(%ebp), %eax ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -617,29 +1037,25 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: shrb $3, %al -; X86-NEXT: andb $12, %al -; X86-NEXT: negb %al -; X86-NEXT: movsbl %al, %edi -; X86-NEXT: movl 72(%esp,%edi), %edx -; X86-NEXT: movl 76(%esp,%edi), %esi -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 64(%esp,%edi), %ebx -; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X86-NEXT: movl 68(%esp,%edi), %ebx -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: shldl %cl, %ebx, %edx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: shrb $3, %dl +; X86-NEXT: andb $12, %dl +; X86-NEXT: negb %dl +; X86-NEXT: movsbl %dl, %esi +; X86-NEXT: movl 64(%esp,%esi), %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: shldl %cl, %eax, %ebx -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shll %cl, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: notl %esi +; X86-NEXT: movl 68(%esp,%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 72(%esp,%esi), %ebx ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: movl 76(%esp,%esi), %edi +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: shldl %cl, %ebx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shll %cl, %edx ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -647,53 +1063,72 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%esi), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %eax +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%esi), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl 40(%esp,%eax), %edi -; X86-NEXT: movl 44(%esp,%eax), %esi -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %edi, %esi ; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: andl 12(%ecx), %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%ecx), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl 4(%ecx), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ecx, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: notl %ecx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl 100(%esp,%ecx), %edi +; X86-NEXT: movl 104(%esp,%ecx), %ecx +; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movzbl 12(%ebp), %ecx +; X86-NEXT: shldl %cl, %edi, %ebx +; X86-NEXT: orl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl 108(%esp,%ebx), %ebx +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: orl %ebx, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: notl %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl 36(%esp,%esi), %esi -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: movl 8(%ebp), %edx -; X86-NEXT: andl 8(%edx), %eax -; X86-NEXT: orl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: notl %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl 32(%esp,%eax), %eax -; X86-NEXT: shldl %cl, %eax, %esi -; X86-NEXT: movl 8(%ebp), %edi -; X86-NEXT: andl 4(%edi), %ebx -; X86-NEXT: orl %esi, %ebx -; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl 96(%esp,%ebx), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: orl %ebx, %eax ; X86-NEXT: notl %edx -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shll %cl, %eax -; X86-NEXT: andl (%edi), %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl $96, %eax -; X86-NEXT: shrl $3, %eax -; X86-NEXT: movl (%edi,%eax), %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 12(%edi) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 8(%edi) -; X86-NEXT: movl %ebx, 4(%edi) -; X86-NEXT: movl %edx, (%edi) -; X86-NEXT: setae %al +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %edi +; X86-NEXT: orl %edi, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 8(%ecx) +; X86-NEXT: movl %esi, 12(%ecx) +; X86-NEXT: movl %eax, (%ecx) +; X86-NEXT: movl %edx, 4(%ecx) +; X86-NEXT: sete %al ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -716,84 +1151,86 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; SSE-NEXT: testb $64, %cl ; SSE-NEXT: cmovneq %rsi, %r8 ; SSE-NEXT: cmovneq %r9, %rsi -; SSE-NEXT: notq %r8 ; SSE-NEXT: cmovneq %rax, %rdx ; SSE-NEXT: cmovneq %r9, %rax +; SSE-NEXT: movq (%rdi), %rcx +; SSE-NEXT: movq 8(%rdi), %r9 +; SSE-NEXT: movq %r9, %r10 +; SSE-NEXT: andq %r8, %r10 +; SSE-NEXT: notq %r8 +; SSE-NEXT: movq %rcx, %r11 +; SSE-NEXT: andq %rsi, %r11 ; SSE-NEXT: notq %rsi -; SSE-NEXT: andq 8(%rdi), %r8 +; SSE-NEXT: andq %r9, %r8 ; SSE-NEXT: orq %rdx, %r8 -; SSE-NEXT: andq (%rdi), %rsi +; SSE-NEXT: andq %rcx, %rsi ; SSE-NEXT: orq %rax, %rsi -; SSE-NEXT: movl %ecx, %eax -; SSE-NEXT: andl $96, %eax -; SSE-NEXT: shrl $3, %eax -; SSE-NEXT: movl (%rdi,%rax), %eax -; SSE-NEXT: btl %ecx, %eax -; SSE-NEXT: setae %al -; SSE-NEXT: movq %r8, 8(%rdi) +; SSE-NEXT: orq %r10, %r11 +; SSE-NEXT: sete %al ; SSE-NEXT: movq %rsi, (%rdi) +; SSE-NEXT: movq %r8, 8(%rdi) ; SSE-NEXT: retq ; ; AVX2-LABEL: init_eq_i128: ; AVX2: # %bb.0: ; AVX2-NEXT: movl %esi, %ecx -; AVX2-NEXT: movl $1, %eax -; AVX2-NEXT: xorl %esi, %esi -; AVX2-NEXT: shldq %cl, %rax, %rsi -; AVX2-NEXT: movl %edx, %edx +; AVX2-NEXT: movl $1, %esi +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: shldq %cl, %rsi, %rax ; AVX2-NEXT: xorl %r8d, %r8d -; AVX2-NEXT: shldq %cl, %rdx, %r8 +; AVX2-NEXT: movl %edx, %edx ; AVX2-NEXT: xorl %r9d, %r9d -; AVX2-NEXT: shlxq %rcx, %rax, %rax +; AVX2-NEXT: shldq %cl, %rdx, %r9 +; AVX2-NEXT: shlxq %rcx, %rsi, %rsi ; AVX2-NEXT: testb $64, %cl -; AVX2-NEXT: cmovneq %rax, %rsi -; AVX2-NEXT: cmovneq %r9, %rax -; AVX2-NEXT: shlxq %rcx, %rdx, %rdx -; AVX2-NEXT: cmovneq %rdx, %r8 -; AVX2-NEXT: cmovneq %r9, %rdx -; AVX2-NEXT: andnq 8(%rdi), %rsi, %rsi -; AVX2-NEXT: orq %r8, %rsi -; AVX2-NEXT: andnq (%rdi), %rax, %r8 -; AVX2-NEXT: orq %rdx, %r8 -; AVX2-NEXT: movl %ecx, %eax -; AVX2-NEXT: andl $96, %eax -; AVX2-NEXT: shrl $3, %eax -; AVX2-NEXT: movl (%rdi,%rax), %eax -; AVX2-NEXT: btl %ecx, %eax -; AVX2-NEXT: setae %al -; AVX2-NEXT: movq %rsi, 8(%rdi) -; AVX2-NEXT: movq %r8, (%rdi) +; AVX2-NEXT: cmovneq %rsi, %rax +; AVX2-NEXT: cmovneq %r8, %rsi +; AVX2-NEXT: shlxq %rcx, %rdx, %rcx +; AVX2-NEXT: cmovneq %rcx, %r9 +; AVX2-NEXT: cmovneq %r8, %rcx +; AVX2-NEXT: movq (%rdi), %rdx +; AVX2-NEXT: movq 8(%rdi), %r8 +; AVX2-NEXT: andnq %r8, %rax, %r10 +; AVX2-NEXT: andq %rax, %r8 +; AVX2-NEXT: andnq %rdx, %rsi, %r11 +; AVX2-NEXT: andq %rsi, %rdx +; AVX2-NEXT: orq %r9, %r10 +; AVX2-NEXT: orq %rcx, %r11 +; AVX2-NEXT: orq %r8, %rdx +; AVX2-NEXT: sete %al +; AVX2-NEXT: movq %r11, (%rdi) +; AVX2-NEXT: movq %r10, 8(%rdi) ; AVX2-NEXT: retq ; ; AVX512-LABEL: init_eq_i128: ; AVX512: # %bb.0: ; AVX512-NEXT: movl %esi, %ecx -; AVX512-NEXT: movl $1, %eax -; AVX512-NEXT: xorl %esi, %esi -; AVX512-NEXT: shldq %cl, %rax, %rsi +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: movl $1, %esi ; AVX512-NEXT: xorl %r8d, %r8d -; AVX512-NEXT: shlxq %rcx, %rax, %rax +; AVX512-NEXT: shldq %cl, %rsi, %r8 +; AVX512-NEXT: shlxq %rcx, %rsi, %rsi ; AVX512-NEXT: movl %edx, %edx ; AVX512-NEXT: xorl %r9d, %r9d ; AVX512-NEXT: shldq %cl, %rdx, %r9 ; AVX512-NEXT: testb $64, %cl +; AVX512-NEXT: cmovneq %rsi, %r8 ; AVX512-NEXT: cmovneq %rax, %rsi -; AVX512-NEXT: cmovneq %r8, %rax -; AVX512-NEXT: shlxq %rcx, %rdx, %rdx -; AVX512-NEXT: cmovneq %rdx, %r9 -; AVX512-NEXT: cmovneq %r8, %rdx -; AVX512-NEXT: andnq 8(%rdi), %rsi, %rsi -; AVX512-NEXT: orq %r9, %rsi -; AVX512-NEXT: andnq (%rdi), %rax, %r8 -; AVX512-NEXT: orq %rdx, %r8 -; AVX512-NEXT: movl %ecx, %eax -; AVX512-NEXT: andl $96, %eax -; AVX512-NEXT: shrl $3, %eax -; AVX512-NEXT: movl (%rdi,%rax), %eax -; AVX512-NEXT: btl %ecx, %eax -; AVX512-NEXT: setae %al -; AVX512-NEXT: movq %rsi, 8(%rdi) +; AVX512-NEXT: shlxq %rcx, %rdx, %rcx +; AVX512-NEXT: cmovneq %rcx, %r9 +; AVX512-NEXT: cmovneq %rax, %rcx +; AVX512-NEXT: movq (%rdi), %rax +; AVX512-NEXT: movq 8(%rdi), %rdx +; AVX512-NEXT: andnq %rdx, %r8, %r10 +; AVX512-NEXT: andq %r8, %rdx +; AVX512-NEXT: andnq %rax, %rsi, %r8 +; AVX512-NEXT: andq %rsi, %rax +; AVX512-NEXT: orq %r9, %r10 +; AVX512-NEXT: orq %rcx, %r8 +; AVX512-NEXT: orq %rdx, %rax +; AVX512-NEXT: sete %al ; AVX512-NEXT: movq %r8, (%rdi) +; AVX512-NEXT: movq %r10, 8(%rdi) ; AVX512-NEXT: retq %rem = and i32 %position, 127 %ofs = zext nneg i32 %rem to i128 @@ -815,25 +1252,344 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind { define i1 @test_ne_i512(ptr %word, i32 %position) nounwind { ; X86-LABEL: test_ne_i512: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: shrl $3, %edx -; X86-NEXT: andl $60, %edx -; X86-NEXT: movl (%eax,%edx), %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: setb %al +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $224, %esp +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrl $3, %eax +; X86-NEXT: andl $60, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: subl %eax, %edx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 24(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl $31, %ecx +; X86-NEXT: shldl %cl, %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 56(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 60(%edx), %eax +; X86-NEXT: shldl %cl, %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 40(%edx), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 16(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 20(%edx), %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%edx), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %esi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl 52(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl 4(%edx), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl %cl, %edi, %eax +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: andl 40(%ebx), %eax +; X86-NEXT: andl 8(%ebx), %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 56(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 24(%ebx), %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: orl %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: andl 44(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 12(%ebx), %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 60(%edi), %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 28(%edi), %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: orl %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%edx), %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl %cl, %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: negl %edx +; X86-NEXT: movl 192(%esp,%edx), %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %edx +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: andl 32(%ebx), %ecx +; X86-NEXT: andl (%ebx), %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: andl 16(%ebx), %edi +; X86-NEXT: andl 48(%ebx), %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 36(%ebx), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 4(%ebx), %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 20(%ebx), %ecx +; X86-NEXT: andl 52(%ebx), %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: orl %edx, %eax +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: test_ne_i512: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: shrl $3, %eax -; X64-NEXT: andl $60, %eax -; X64-NEXT: movl (%rdi,%rax), %eax -; X64-NEXT: btl %esi, %eax -; X64-NEXT: setb %al -; X64-NEXT: retq +; SSE-LABEL: test_ne_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: andl $63, %ecx +; SSE-NEXT: shrl $3, %esi +; SSE-NEXT: andl $56, %esi +; SSE-NEXT: negl %esi +; SSE-NEXT: movslq %esi, %rbx +; SSE-NEXT: movq -48(%rsp,%rbx), %rdx +; SSE-NEXT: movq -40(%rsp,%rbx), %r14 +; SSE-NEXT: movq %r14, %rax +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq -16(%rsp,%rbx), %r11 +; SSE-NEXT: movq -8(%rsp,%rbx), %r10 +; SSE-NEXT: shldq %cl, %r11, %r10 +; SSE-NEXT: movq -32(%rsp,%rbx), %r9 +; SSE-NEXT: movq -24(%rsp,%rbx), %r15 +; SSE-NEXT: movq %r15, %r8 +; SSE-NEXT: shldq %cl, %r9, %r8 +; SSE-NEXT: movq -56(%rsp,%rbx), %rsi +; SSE-NEXT: shldq %cl, %rsi, %rdx +; SSE-NEXT: shldq %cl, %r15, %r11 +; SSE-NEXT: shldq %cl, %r14, %r9 +; SSE-NEXT: movq -64(%rsp,%rbx), %rbx +; SSE-NEXT: shldq %cl, %rbx, %rsi +; SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE-NEXT: shlq %cl, %rbx +; SSE-NEXT: andq 32(%rdi), %r9 +; SSE-NEXT: andq 48(%rdi), %r11 +; SSE-NEXT: andq 16(%rdi), %rdx +; SSE-NEXT: orq %r11, %rdx +; SSE-NEXT: andq 40(%rdi), %r8 +; SSE-NEXT: andq 56(%rdi), %r10 +; SSE-NEXT: andq 24(%rdi), %rax +; SSE-NEXT: orq %r10, %rax +; SSE-NEXT: andq (%rdi), %rbx +; SSE-NEXT: orq %r9, %rbx +; SSE-NEXT: orq %rdx, %rbx +; SSE-NEXT: andq 8(%rdi), %rsi +; SSE-NEXT: orq %r8, %rsi +; SSE-NEXT: orq %rax, %rsi +; SSE-NEXT: orq %rbx, %rsi +; SSE-NEXT: setne %al +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ne_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0] +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: andl $63, %ecx +; AVX2-NEXT: shrl $3, %esi +; AVX2-NEXT: andl $56, %esi +; AVX2-NEXT: negl %esi +; AVX2-NEXT: movslq %esi, %rsi +; AVX2-NEXT: movq -48(%rsp,%rsi), %rdx +; AVX2-NEXT: movq -40(%rsp,%rsi), %rbx +; AVX2-NEXT: movq %rbx, %rax +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq -16(%rsp,%rsi), %r11 +; AVX2-NEXT: movq -8(%rsp,%rsi), %r10 +; AVX2-NEXT: shldq %cl, %r11, %r10 +; AVX2-NEXT: movq -32(%rsp,%rsi), %r9 +; AVX2-NEXT: movq -24(%rsp,%rsi), %r14 +; AVX2-NEXT: movq %r14, %r8 +; AVX2-NEXT: shldq %cl, %r9, %r8 +; AVX2-NEXT: movq -64(%rsp,%rsi), %r15 +; AVX2-NEXT: movq -56(%rsp,%rsi), %rsi +; AVX2-NEXT: shldq %cl, %rsi, %rdx +; AVX2-NEXT: shldq %cl, %r14, %r11 +; AVX2-NEXT: shldq %cl, %rbx, %r9 +; AVX2-NEXT: shldq %cl, %r15, %rsi +; AVX2-NEXT: shlxq %rcx, %r15, %rcx +; AVX2-NEXT: andq 32(%rdi), %r9 +; AVX2-NEXT: andq 48(%rdi), %r11 +; AVX2-NEXT: andq 16(%rdi), %rdx +; AVX2-NEXT: andq 40(%rdi), %r8 +; AVX2-NEXT: andq 56(%rdi), %r10 +; AVX2-NEXT: andq 24(%rdi), %rax +; AVX2-NEXT: orq %r11, %rdx +; AVX2-NEXT: orq %r10, %rax +; AVX2-NEXT: andq (%rdi), %rcx +; AVX2-NEXT: orq %r9, %rcx +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: andq 8(%rdi), %rsi +; AVX2-NEXT: orq %r8, %rsi +; AVX2-NEXT: orq %rax, %rsi +; AVX2-NEXT: orq %rcx, %rsi +; AVX2-NEXT: setne %al +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ne_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0] +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: andl $63, %ecx +; AVX512-NEXT: shrl $3, %esi +; AVX512-NEXT: andl $56, %esi +; AVX512-NEXT: negl %esi +; AVX512-NEXT: movslq %esi, %rbx +; AVX512-NEXT: movq -48(%rsp,%rbx), %rdx +; AVX512-NEXT: movq -40(%rsp,%rbx), %r14 +; AVX512-NEXT: movq %r14, %rax +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq -16(%rsp,%rbx), %r11 +; AVX512-NEXT: movq -8(%rsp,%rbx), %r10 +; AVX512-NEXT: shldq %cl, %r11, %r10 +; AVX512-NEXT: movq -32(%rsp,%rbx), %r9 +; AVX512-NEXT: movq -24(%rsp,%rbx), %r15 +; AVX512-NEXT: movq %r15, %r8 +; AVX512-NEXT: shldq %cl, %r9, %r8 +; AVX512-NEXT: movq -56(%rsp,%rbx), %rsi +; AVX512-NEXT: shldq %cl, %rsi, %rdx +; AVX512-NEXT: shldq %cl, %r15, %r11 +; AVX512-NEXT: shldq %cl, %r14, %r9 +; AVX512-NEXT: movq -64(%rsp,%rbx), %rbx +; AVX512-NEXT: shldq %cl, %rbx, %rsi +; AVX512-NEXT: shlxq %rcx, %rbx, %rcx +; AVX512-NEXT: andq 32(%rdi), %r9 +; AVX512-NEXT: andq 48(%rdi), %r11 +; AVX512-NEXT: andq 16(%rdi), %rdx +; AVX512-NEXT: andq 40(%rdi), %r8 +; AVX512-NEXT: andq 56(%rdi), %r10 +; AVX512-NEXT: andq 24(%rdi), %rax +; AVX512-NEXT: orq %r11, %rdx +; AVX512-NEXT: orq %r10, %rax +; AVX512-NEXT: andq (%rdi), %rcx +; AVX512-NEXT: orq %r9, %rcx +; AVX512-NEXT: orq %rdx, %rcx +; AVX512-NEXT: andq 8(%rdi), %rsi +; AVX512-NEXT: orq %r8, %rsi +; AVX512-NEXT: orq %rax, %rsi +; AVX512-NEXT: orq %rcx, %rsi +; AVX512-NEXT: setne %al +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %rem = and i32 %position, 511 %ofs = zext nneg i32 %rem to i512 %bit = shl nuw i512 1, %ofs @@ -846,33 +1602,572 @@ define i1 @test_ne_i512(ptr %word, i32 %position) nounwind { define i1 @complement_ne_i512(ptr %word, i32 %position) nounwind { ; X86-LABEL: complement_ne_i512: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: andl $60, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setb %al -; X86-NEXT: btcl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $272, %esp # imm = 0x110 +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrl $3, %eax +; X86-NEXT: andl $60, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: subl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 24(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl $31, %ecx +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 56(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 60(%edx), %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 40(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 16(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 20(%edx), %ebx +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%edx), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl 52(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 4(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl 40(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %eax +; X86-NEXT: movl 8(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl 56(%edx), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edi, %ebx +; X86-NEXT: movl 24(%edx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%eax), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl 12(%eax), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: orl %esi, %ebx +; X86-NEXT: movl 60(%eax), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl 28(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: negl %eax +; X86-NEXT: movl 240(%esp,%eax), %esi +; X86-NEXT: shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl 32(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edi, %eax +; X86-NEXT: movl (%esi), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edx, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl 16(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %eax +; X86-NEXT: movl 48(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 36(%esi), %ebx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl 4(%esi), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl 20(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl %esi, %edi +; X86-NEXT: movl 52(%eax), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: orl %edi, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: xorl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl %ecx, (%esp) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl %ebx, 60(%edx) +; X86-NEXT: movl %edi, 56(%edx) +; X86-NEXT: movl %ecx, 52(%edx) +; X86-NEXT: movl %esi, 44(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 40(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 36(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 32(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 28(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 24(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 20(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 16(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 12(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 8(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 4(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, (%edx) +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 48(%edx) +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: complement_ne_i512: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrl $3, %ecx -; X64-NEXT: andl $60, %ecx -; X64-NEXT: movl (%rdi,%rcx), %edx -; X64-NEXT: btl %esi, %edx -; X64-NEXT: setb %al -; X64-NEXT: btcl %esi, %edx -; X64-NEXT: movl %edx, (%rdi,%rcx) -; X64-NEXT: retq +; SSE-LABEL: complement_ne_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: subq $56, %rsp +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: andl $63, %ecx +; SSE-NEXT: shrl $3, %esi +; SSE-NEXT: andl $56, %esi +; SSE-NEXT: negl %esi +; SSE-NEXT: movslq %esi, %rbx +; SSE-NEXT: movq (%rsp,%rbx), %rsi +; SSE-NEXT: movq 8(%rsp,%rbx), %r14 +; SSE-NEXT: movq %r14, %rax +; SSE-NEXT: shldq %cl, %rsi, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 32(%rsp,%rbx), %r8 +; SSE-NEXT: movq 40(%rsp,%rbx), %rbp +; SSE-NEXT: shldq %cl, %r8, %rbp +; SSE-NEXT: movq 16(%rsp,%rbx), %r9 +; SSE-NEXT: movq 24(%rsp,%rbx), %r15 +; SSE-NEXT: movq %r15, %r10 +; SSE-NEXT: shldq %cl, %r9, %r10 +; SSE-NEXT: movq -8(%rsp,%rbx), %r11 +; SSE-NEXT: shldq %cl, %r11, %rsi +; SSE-NEXT: shldq %cl, %r15, %r8 +; SSE-NEXT: shldq %cl, %r14, %r9 +; SSE-NEXT: movq -16(%rsp,%rbx), %rbx +; SSE-NEXT: shldq %cl, %rbx, %r11 +; SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE-NEXT: shlq %cl, %rbx +; SSE-NEXT: movq 24(%rdi), %r15 +; SSE-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 56(%rdi), %rcx +; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 16(%rdi), %r12 +; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 48(%rdi), %r13 +; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %r8, %r13 +; SSE-NEXT: andq %rsi, %r12 +; SSE-NEXT: orq %r13, %r12 +; SSE-NEXT: movq %rcx, %r13 +; SSE-NEXT: andq %rbp, %r13 +; SSE-NEXT: andq %rax, %r15 +; SSE-NEXT: orq %r13, %r15 +; SSE-NEXT: movq 32(%rdi), %r14 +; SSE-NEXT: movq %r14, %rcx +; SSE-NEXT: andq %r9, %rcx +; SSE-NEXT: movq (%rdi), %r13 +; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rbx, %r13 +; SSE-NEXT: orq %rcx, %r13 +; SSE-NEXT: orq %r12, %r13 +; SSE-NEXT: movq 40(%rdi), %rcx +; SSE-NEXT: movq %rcx, %r12 +; SSE-NEXT: andq %r10, %r12 +; SSE-NEXT: movq 8(%rdi), %rdx +; SSE-NEXT: movq %rdx, %rax +; SSE-NEXT: andq %r11, %rax +; SSE-NEXT: orq %r12, %rax +; SSE-NEXT: orq %r15, %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; SSE-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; SSE-NEXT: xorq %rcx, %r10 +; SSE-NEXT: xorq %r14, %r9 +; SSE-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; SSE-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; SSE-NEXT: xorq %rdx, %r11 +; SSE-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; SSE-NEXT: orq %r13, %rax +; SSE-NEXT: movq %r8, 48(%rdi) +; SSE-NEXT: movq %rbp, 56(%rdi) +; SSE-NEXT: movq %r9, 32(%rdi) +; SSE-NEXT: movq %r10, 40(%rdi) +; SSE-NEXT: movq %rsi, 16(%rdi) +; SSE-NEXT: movq %r15, 24(%rdi) +; SSE-NEXT: movq %rbx, (%rdi) +; SSE-NEXT: movq %r11, 8(%rdi) +; SSE-NEXT: setne %al +; SSE-NEXT: addq $56, %rsp +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: complement_ne_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: subq $72, %rsp +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0] +; AVX2-NEXT: vmovups %ymm0, (%rsp) +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: andl $63, %ecx +; AVX2-NEXT: shrl $3, %esi +; AVX2-NEXT: andl $56, %esi +; AVX2-NEXT: negl %esi +; AVX2-NEXT: movslq %esi, %rbx +; AVX2-NEXT: movq 16(%rsp,%rbx), %rsi +; AVX2-NEXT: movq 24(%rsp,%rbx), %rbp +; AVX2-NEXT: movq %rbp, %rax +; AVX2-NEXT: shldq %cl, %rsi, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 48(%rsp,%rbx), %r8 +; AVX2-NEXT: movq 56(%rsp,%rbx), %r13 +; AVX2-NEXT: shldq %cl, %r8, %r13 +; AVX2-NEXT: movq 32(%rsp,%rbx), %r9 +; AVX2-NEXT: movq 40(%rsp,%rbx), %r14 +; AVX2-NEXT: movq %r14, %r10 +; AVX2-NEXT: shldq %cl, %r9, %r10 +; AVX2-NEXT: movq 8(%rsp,%rbx), %r11 +; AVX2-NEXT: shldq %cl, %r11, %rsi +; AVX2-NEXT: shldq %cl, %r14, %r8 +; AVX2-NEXT: movq 16(%rdi), %r12 +; AVX2-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 48(%rdi), %r14 +; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %r8, %r14 +; AVX2-NEXT: andq %rsi, %r12 +; AVX2-NEXT: orq %r14, %r12 +; AVX2-NEXT: movq 56(%rdi), %r15 +; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %r13, %r15 +; AVX2-NEXT: movq 24(%rdi), %r14 +; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %rax, %r14 +; AVX2-NEXT: orq %r15, %r14 +; AVX2-NEXT: shldq %cl, %rbp, %r9 +; AVX2-NEXT: movq (%rsp,%rbx), %rdx +; AVX2-NEXT: movq 32(%rdi), %r15 +; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %r9, %r15 +; AVX2-NEXT: shlxq %rcx, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq (%rdi), %rbx +; AVX2-NEXT: movq %rbx, %rbp +; AVX2-NEXT: andq %rax, %rbp +; AVX2-NEXT: orq %r15, %rbp +; AVX2-NEXT: orq %r12, %rbp +; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx +; AVX2-NEXT: shldq %cl, %rdx, %r11 +; AVX2-NEXT: movq 40(%rdi), %rax +; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: andq %r10, %rcx +; AVX2-NEXT: movq 8(%rdi), %r15 +; AVX2-NEXT: movq %r15, %r12 +; AVX2-NEXT: andq %r11, %r12 +; AVX2-NEXT: orq %rcx, %r12 +; AVX2-NEXT: orq %r14, %r12 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; AVX2-NEXT: xorq %rax, %r10 +; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; AVX2-NEXT: xorq %r15, %r11 +; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; AVX2-NEXT: orq %rbp, %r12 +; AVX2-NEXT: movq %r8, 48(%rdi) +; AVX2-NEXT: movq %r13, 56(%rdi) +; AVX2-NEXT: movq %r9, 32(%rdi) +; AVX2-NEXT: movq %r10, 40(%rdi) +; AVX2-NEXT: movq %rsi, 16(%rdi) +; AVX2-NEXT: movq %rcx, 24(%rdi) +; AVX2-NEXT: movq %rbx, (%rdi) +; AVX2-NEXT: movq %r11, 8(%rdi) +; AVX2-NEXT: setne %al +; AVX2-NEXT: addq $72, %rsp +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: complement_ne_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: subq $72, %rsp +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0] +; AVX512-NEXT: vmovups %ymm0, (%rsp) +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: andl $63, %ecx +; AVX512-NEXT: shrl $3, %esi +; AVX512-NEXT: andl $56, %esi +; AVX512-NEXT: negl %esi +; AVX512-NEXT: movslq %esi, %rbx +; AVX512-NEXT: movq 16(%rsp,%rbx), %rsi +; AVX512-NEXT: movq 24(%rsp,%rbx), %rbp +; AVX512-NEXT: movq %rbp, %rax +; AVX512-NEXT: shldq %cl, %rsi, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 48(%rsp,%rbx), %r8 +; AVX512-NEXT: movq 56(%rsp,%rbx), %r13 +; AVX512-NEXT: shldq %cl, %r8, %r13 +; AVX512-NEXT: movq 32(%rsp,%rbx), %r9 +; AVX512-NEXT: movq 40(%rsp,%rbx), %r14 +; AVX512-NEXT: movq %r14, %r10 +; AVX512-NEXT: shldq %cl, %r9, %r10 +; AVX512-NEXT: movq 8(%rsp,%rbx), %r11 +; AVX512-NEXT: shldq %cl, %r11, %rsi +; AVX512-NEXT: shldq %cl, %r14, %r8 +; AVX512-NEXT: movq 16(%rdi), %r12 +; AVX512-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 48(%rdi), %r14 +; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %r8, %r14 +; AVX512-NEXT: andq %rsi, %r12 +; AVX512-NEXT: orq %r14, %r12 +; AVX512-NEXT: movq 56(%rdi), %r15 +; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %r13, %r15 +; AVX512-NEXT: movq 24(%rdi), %r14 +; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %rax, %r14 +; AVX512-NEXT: orq %r15, %r14 +; AVX512-NEXT: shldq %cl, %rbp, %r9 +; AVX512-NEXT: movq (%rsp,%rbx), %rdx +; AVX512-NEXT: movq 32(%rdi), %r15 +; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %r9, %r15 +; AVX512-NEXT: shlxq %rcx, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq (%rdi), %rbx +; AVX512-NEXT: movq %rbx, %rbp +; AVX512-NEXT: andq %rax, %rbp +; AVX512-NEXT: orq %r15, %rbp +; AVX512-NEXT: orq %r12, %rbp +; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx +; AVX512-NEXT: shldq %cl, %rdx, %r11 +; AVX512-NEXT: movq 40(%rdi), %rax +; AVX512-NEXT: movq %rax, %rcx +; AVX512-NEXT: andq %r10, %rcx +; AVX512-NEXT: movq 8(%rdi), %r15 +; AVX512-NEXT: movq %r15, %r12 +; AVX512-NEXT: andq %r11, %r12 +; AVX512-NEXT: orq %rcx, %r12 +; AVX512-NEXT: orq %r14, %r12 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; AVX512-NEXT: xorq %rax, %r10 +; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; AVX512-NEXT: xorq %r15, %r11 +; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; AVX512-NEXT: orq %rbp, %r12 +; AVX512-NEXT: movq %r8, 48(%rdi) +; AVX512-NEXT: movq %r13, 56(%rdi) +; AVX512-NEXT: movq %r9, 32(%rdi) +; AVX512-NEXT: movq %r10, 40(%rdi) +; AVX512-NEXT: movq %rsi, 16(%rdi) +; AVX512-NEXT: movq %rcx, 24(%rdi) +; AVX512-NEXT: movq %rbx, (%rdi) +; AVX512-NEXT: movq %r11, 8(%rdi) +; AVX512-NEXT: setne %al +; AVX512-NEXT: addq $72, %rsp +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %rem = and i32 %position, 511 %ofs = zext nneg i32 %rem to i512 %bit = shl nuw i512 1, %ofs @@ -887,33 +2182,606 @@ define i1 @complement_ne_i512(ptr %word, i32 %position) nounwind { define i1 @reset_eq_i512(ptr %word, i32 %position) nounwind { ; X86-LABEL: reset_eq_i512: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $288, %esp # imm = 0x120 +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrl $3, %eax +; X86-NEXT: andl $60, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: leal {{[0-9]+}}(%esp), %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 4(%edi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%edi), %eax +; X86-NEXT: andl $31, %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: shldl %cl, %edx, %ebx +; X86-NEXT: movl 12(%edi), %edx ; X86-NEXT: movl %edx, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: andl $60, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setae %al -; X86-NEXT: btrl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 16(%edi), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 20(%edi), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 24(%edi), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%edi), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%edi), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%edi), %esi +; X86-NEXT: movl %esi, %edx +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 40(%edi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 40(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %eax, %edx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %eax, %ebx +; X86-NEXT: orl %edx, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%edi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl 52(%edi), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 56(%edi), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shldl %cl, %esi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl 56(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %eax, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 24(%esi), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: orl %ebx, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: movl 44(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %eax, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%esi), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 60(%edi), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: shldl %cl, %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 60(%ebx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%ebx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edx, %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%edi), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: negl %eax +; X86-NEXT: movl 256(%esp,%eax), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl %cl, %edi, %eax +; X86-NEXT: movl %esi, %edi +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: movl 32(%ebx), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%ebx), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ecx, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 16(%esi), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ecx, %ebx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%esi), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: orl %ebx, %eax +; X86-NEXT: orl %edi, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl %cl, %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%esi), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %edx +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 4(%esi), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl %cl, %edi, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 20(%esi), %edi +; X86-NEXT: andl %edi, %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: movl 52(%ebx), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: notl %ebx +; X86-NEXT: andl %edi, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: notl %edi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: notl %edi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: notl %edi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: notl %ecx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edx, 60(%eax) +; X86-NEXT: movl %esi, 56(%eax) +; X86-NEXT: movl %ecx, 52(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 44(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 40(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 36(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 32(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 28(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 24(%eax) +; X86-NEXT: movl %ebx, 20(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 16(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 48(%eax) +; X86-NEXT: sete %al +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: reset_eq_i512: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrl $3, %ecx -; X64-NEXT: andl $60, %ecx -; X64-NEXT: movl (%rdi,%rcx), %edx -; X64-NEXT: btl %esi, %edx -; X64-NEXT: setae %al -; X64-NEXT: btrl %esi, %edx -; X64-NEXT: movl %edx, (%rdi,%rcx) -; X64-NEXT: retq +; SSE-LABEL: reset_eq_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: subq $56, %rsp +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: andl $63, %ecx +; SSE-NEXT: shrl $3, %esi +; SSE-NEXT: andl $56, %esi +; SSE-NEXT: negl %esi +; SSE-NEXT: movslq %esi, %rdx +; SSE-NEXT: movq (%rsp,%rdx), %r9 +; SSE-NEXT: movq 8(%rsp,%rdx), %r8 +; SSE-NEXT: movq %r8, %rsi +; SSE-NEXT: shldq %cl, %r9, %rsi +; SSE-NEXT: movq -8(%rsp,%rdx), %rax +; SSE-NEXT: shldq %cl, %rax, %r9 +; SSE-NEXT: movq 16(%rsp,%rdx), %r14 +; SSE-NEXT: movq 24(%rsp,%rdx), %r10 +; SSE-NEXT: movq %r10, %rbx +; SSE-NEXT: shldq %cl, %r14, %rbx +; SSE-NEXT: shldq %cl, %r8, %r14 +; SSE-NEXT: movq 32(%rsp,%rdx), %r13 +; SSE-NEXT: movq 40(%rsp,%rdx), %r12 +; SSE-NEXT: shldq %cl, %r13, %r12 +; SSE-NEXT: shldq %cl, %r10, %r13 +; SSE-NEXT: movq -16(%rsp,%rdx), %rdx +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE-NEXT: shlq %cl, %rdx +; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq %r12, %rbp +; SSE-NEXT: movq %r9, %r15 +; SSE-NEXT: movq %rsi, %r11 +; SSE-NEXT: movq 16(%rdi), %r8 +; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 48(%rdi), %rcx +; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rcx, %r13 +; SSE-NEXT: andq %r8, %r9 +; SSE-NEXT: orq %r13, %r9 +; SSE-NEXT: movq 56(%rdi), %rcx +; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rcx, %r12 +; SSE-NEXT: movq 24(%rdi), %r10 +; SSE-NEXT: andq %r10, %rsi +; SSE-NEXT: orq %r12, %rsi +; SSE-NEXT: movq %r14, %r13 +; SSE-NEXT: movq 32(%rdi), %rcx +; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rcx, %r14 +; SSE-NEXT: movq %rdx, %r12 +; SSE-NEXT: movq (%rdi), %rcx +; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rcx, %rdx +; SSE-NEXT: orq %r14, %rdx +; SSE-NEXT: orq %r9, %rdx +; SSE-NEXT: movq %rbx, %r14 +; SSE-NEXT: movq 40(%rdi), %rcx +; SSE-NEXT: andq %rcx, %rbx +; SSE-NEXT: movq %rax, %r9 +; SSE-NEXT: movq 8(%rdi), %r8 +; SSE-NEXT: andq %r8, %rax +; SSE-NEXT: orq %rbx, %rax +; SSE-NEXT: orq %rsi, %rax +; SSE-NEXT: notq %r11 +; SSE-NEXT: andq %r10, %r11 +; SSE-NEXT: notq %r15 +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; SSE-NEXT: notq %r14 +; SSE-NEXT: andq %rcx, %r14 +; SSE-NEXT: notq %r13 +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; SSE-NEXT: notq %rbp +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; SSE-NEXT: notq %rcx +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; SSE-NEXT: notq %r9 +; SSE-NEXT: andq %r8, %r9 +; SSE-NEXT: notq %r12 +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: movq %rcx, 48(%rdi) +; SSE-NEXT: movq %rbp, 56(%rdi) +; SSE-NEXT: movq %r13, 32(%rdi) +; SSE-NEXT: movq %r14, 40(%rdi) +; SSE-NEXT: movq %r15, 16(%rdi) +; SSE-NEXT: movq %r11, 24(%rdi) +; SSE-NEXT: movq %r12, (%rdi) +; SSE-NEXT: movq %r9, 8(%rdi) +; SSE-NEXT: sete %al +; SSE-NEXT: addq $56, %rsp +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: reset_eq_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: pushq %rax +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0] +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: andl $63, %ecx +; AVX2-NEXT: shrl $3, %esi +; AVX2-NEXT: andl $56, %esi +; AVX2-NEXT: negl %esi +; AVX2-NEXT: movslq %esi, %rdx +; AVX2-NEXT: movq -48(%rsp,%rdx), %r8 +; AVX2-NEXT: movq -40(%rsp,%rdx), %rbx +; AVX2-NEXT: movq %rbx, %rax +; AVX2-NEXT: shldq %cl, %r8, %rax +; AVX2-NEXT: movq -16(%rsp,%rdx), %r10 +; AVX2-NEXT: movq -8(%rsp,%rdx), %rsi +; AVX2-NEXT: shldq %cl, %r10, %rsi +; AVX2-NEXT: movq -32(%rsp,%rdx), %r11 +; AVX2-NEXT: movq -24(%rsp,%rdx), %r14 +; AVX2-NEXT: movq %r14, %r9 +; AVX2-NEXT: shldq %cl, %r11, %r9 +; AVX2-NEXT: movq -64(%rsp,%rdx), %r15 +; AVX2-NEXT: movq -56(%rsp,%rdx), %rdx +; AVX2-NEXT: shldq %cl, %rdx, %r8 +; AVX2-NEXT: shldq %cl, %r14, %r10 +; AVX2-NEXT: shldq %cl, %rbx, %r11 +; AVX2-NEXT: shldq %cl, %r15, %rdx +; AVX2-NEXT: shlxq %rcx, %r15, %rcx +; AVX2-NEXT: movq 24(%rdi), %rbx +; AVX2-NEXT: movq 56(%rdi), %r14 +; AVX2-NEXT: movq 16(%rdi), %r15 +; AVX2-NEXT: movq 48(%rdi), %r13 +; AVX2-NEXT: movq 32(%rdi), %rbp +; AVX2-NEXT: andnq %rbp, %r11, %r12 +; AVX2-NEXT: andq %r11, %rbp +; AVX2-NEXT: andnq %r13, %r10, %r11 +; AVX2-NEXT: andq %r10, %r13 +; AVX2-NEXT: andnq %r15, %r8, %r10 +; AVX2-NEXT: andq %r8, %r15 +; AVX2-NEXT: movq 40(%rdi), %r8 +; AVX2-NEXT: orq %r13, %r15 +; AVX2-NEXT: andnq %r8, %r9, %r13 +; AVX2-NEXT: andq %r9, %r8 +; AVX2-NEXT: andnq %r14, %rsi, %r9 +; AVX2-NEXT: andq %rsi, %r14 +; AVX2-NEXT: andnq %rbx, %rax, %rsi +; AVX2-NEXT: andq %rax, %rbx +; AVX2-NEXT: movq (%rdi), %rax +; AVX2-NEXT: orq %r14, %rbx +; AVX2-NEXT: andnq %rax, %rcx, %r14 +; AVX2-NEXT: andq %rcx, %rax +; AVX2-NEXT: orq %rbp, %rax +; AVX2-NEXT: movq 8(%rdi), %rcx +; AVX2-NEXT: orq %r15, %rax +; AVX2-NEXT: andnq %rcx, %rdx, %r15 +; AVX2-NEXT: andq %rdx, %rcx +; AVX2-NEXT: orq %r8, %rcx +; AVX2-NEXT: orq %rbx, %rcx +; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: movq %r11, 48(%rdi) +; AVX2-NEXT: movq %r9, 56(%rdi) +; AVX2-NEXT: movq %r12, 32(%rdi) +; AVX2-NEXT: movq %r13, 40(%rdi) +; AVX2-NEXT: movq %r10, 16(%rdi) +; AVX2-NEXT: movq %rsi, 24(%rdi) +; AVX2-NEXT: movq %r14, (%rdi) +; AVX2-NEXT: movq %r15, 8(%rdi) +; AVX2-NEXT: sete %al +; AVX2-NEXT: addq $8, %rsp +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: reset_eq_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: pushq %rax +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0] +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: andl $63, %ecx +; AVX512-NEXT: shrl $3, %esi +; AVX512-NEXT: andl $56, %esi +; AVX512-NEXT: negl %esi +; AVX512-NEXT: movslq %esi, %rbx +; AVX512-NEXT: movq -48(%rsp,%rbx), %r8 +; AVX512-NEXT: movq -40(%rsp,%rbx), %r14 +; AVX512-NEXT: movq %r14, %rax +; AVX512-NEXT: shldq %cl, %r8, %rax +; AVX512-NEXT: movq -16(%rsp,%rbx), %r10 +; AVX512-NEXT: movq -8(%rsp,%rbx), %rsi +; AVX512-NEXT: shldq %cl, %r10, %rsi +; AVX512-NEXT: movq -32(%rsp,%rbx), %r11 +; AVX512-NEXT: movq -24(%rsp,%rbx), %r15 +; AVX512-NEXT: movq %r15, %r9 +; AVX512-NEXT: shldq %cl, %r11, %r9 +; AVX512-NEXT: movq -56(%rsp,%rbx), %rdx +; AVX512-NEXT: shldq %cl, %rdx, %r8 +; AVX512-NEXT: shldq %cl, %r15, %r10 +; AVX512-NEXT: shldq %cl, %r14, %r11 +; AVX512-NEXT: movq -64(%rsp,%rbx), %rbx +; AVX512-NEXT: shldq %cl, %rbx, %rdx +; AVX512-NEXT: shlxq %rcx, %rbx, %rcx +; AVX512-NEXT: movq 24(%rdi), %rbx +; AVX512-NEXT: movq 56(%rdi), %r14 +; AVX512-NEXT: movq 16(%rdi), %r15 +; AVX512-NEXT: movq 48(%rdi), %r13 +; AVX512-NEXT: movq 32(%rdi), %rbp +; AVX512-NEXT: andnq %rbp, %r11, %r12 +; AVX512-NEXT: andq %r11, %rbp +; AVX512-NEXT: andnq %r13, %r10, %r11 +; AVX512-NEXT: andq %r10, %r13 +; AVX512-NEXT: andnq %r15, %r8, %r10 +; AVX512-NEXT: andq %r8, %r15 +; AVX512-NEXT: movq 40(%rdi), %r8 +; AVX512-NEXT: orq %r13, %r15 +; AVX512-NEXT: andnq %r8, %r9, %r13 +; AVX512-NEXT: andq %r9, %r8 +; AVX512-NEXT: andnq %r14, %rsi, %r9 +; AVX512-NEXT: andq %rsi, %r14 +; AVX512-NEXT: andnq %rbx, %rax, %rsi +; AVX512-NEXT: andq %rax, %rbx +; AVX512-NEXT: movq (%rdi), %rax +; AVX512-NEXT: orq %r14, %rbx +; AVX512-NEXT: andnq %rax, %rcx, %r14 +; AVX512-NEXT: andq %rcx, %rax +; AVX512-NEXT: orq %rbp, %rax +; AVX512-NEXT: movq 8(%rdi), %rcx +; AVX512-NEXT: orq %r15, %rax +; AVX512-NEXT: andnq %rcx, %rdx, %r15 +; AVX512-NEXT: andq %rdx, %rcx +; AVX512-NEXT: orq %r8, %rcx +; AVX512-NEXT: orq %rbx, %rcx +; AVX512-NEXT: orq %rax, %rcx +; AVX512-NEXT: movq %r11, 48(%rdi) +; AVX512-NEXT: movq %r9, 56(%rdi) +; AVX512-NEXT: movq %r12, 32(%rdi) +; AVX512-NEXT: movq %r13, 40(%rdi) +; AVX512-NEXT: movq %r10, 16(%rdi) +; AVX512-NEXT: movq %rsi, 24(%rdi) +; AVX512-NEXT: movq %r14, (%rdi) +; AVX512-NEXT: movq %r15, 8(%rdi) +; AVX512-NEXT: sete %al +; AVX512-NEXT: addq $8, %rsp +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %rem = and i32 %position, 511 %ofs = zext nneg i32 %rem to i512 %bit = shl nuw i512 1, %ofs @@ -929,33 +2797,572 @@ define i1 @reset_eq_i512(ptr %word, i32 %position) nounwind { define i1 @set_ne_i512(ptr %word, i32 %position) nounwind { ; X86-LABEL: set_ne_i512: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: andl $60, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setb %al -; X86-NEXT: btsl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $272, %esp # imm = 0x110 +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrl $3, %eax +; X86-NEXT: andl $60, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: subl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 24(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl $31, %ecx +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 56(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 60(%edx), %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 40(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 16(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 20(%edx), %ebx +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%edx), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl 52(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 4(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl 40(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %eax +; X86-NEXT: movl 8(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl 56(%edx), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edi, %ebx +; X86-NEXT: movl 24(%edx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%eax), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl 12(%eax), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: orl %esi, %ebx +; X86-NEXT: movl 60(%eax), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl 28(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: negl %eax +; X86-NEXT: movl 240(%esp,%eax), %esi +; X86-NEXT: shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl 32(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edi, %eax +; X86-NEXT: movl (%esi), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edx, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl 16(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %eax +; X86-NEXT: movl 48(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 36(%esi), %ebx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl 4(%esi), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl 20(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl %esi, %edi +; X86-NEXT: movl 52(%eax), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: orl %edi, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: orl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %ecx, (%esp) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl %ebx, 60(%edx) +; X86-NEXT: movl %edi, 56(%edx) +; X86-NEXT: movl %ecx, 52(%edx) +; X86-NEXT: movl %esi, 44(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 40(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 36(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 32(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 28(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 24(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 20(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 16(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 12(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 8(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 4(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, (%edx) +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 48(%edx) +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: set_ne_i512: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrl $3, %ecx -; X64-NEXT: andl $60, %ecx -; X64-NEXT: movl (%rdi,%rcx), %edx -; X64-NEXT: btl %esi, %edx -; X64-NEXT: setb %al -; X64-NEXT: btsl %esi, %edx -; X64-NEXT: movl %edx, (%rdi,%rcx) -; X64-NEXT: retq +; SSE-LABEL: set_ne_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: subq $56, %rsp +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: andl $63, %ecx +; SSE-NEXT: shrl $3, %esi +; SSE-NEXT: andl $56, %esi +; SSE-NEXT: negl %esi +; SSE-NEXT: movslq %esi, %rbx +; SSE-NEXT: movq (%rsp,%rbx), %rsi +; SSE-NEXT: movq 8(%rsp,%rbx), %r14 +; SSE-NEXT: movq %r14, %rax +; SSE-NEXT: shldq %cl, %rsi, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 32(%rsp,%rbx), %r8 +; SSE-NEXT: movq 40(%rsp,%rbx), %rbp +; SSE-NEXT: shldq %cl, %r8, %rbp +; SSE-NEXT: movq 16(%rsp,%rbx), %r9 +; SSE-NEXT: movq 24(%rsp,%rbx), %r15 +; SSE-NEXT: movq %r15, %r10 +; SSE-NEXT: shldq %cl, %r9, %r10 +; SSE-NEXT: movq -8(%rsp,%rbx), %r11 +; SSE-NEXT: shldq %cl, %r11, %rsi +; SSE-NEXT: shldq %cl, %r15, %r8 +; SSE-NEXT: shldq %cl, %r14, %r9 +; SSE-NEXT: movq -16(%rsp,%rbx), %rbx +; SSE-NEXT: shldq %cl, %rbx, %r11 +; SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE-NEXT: shlq %cl, %rbx +; SSE-NEXT: movq 24(%rdi), %r15 +; SSE-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 56(%rdi), %rcx +; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 16(%rdi), %r12 +; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 48(%rdi), %r13 +; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %r8, %r13 +; SSE-NEXT: andq %rsi, %r12 +; SSE-NEXT: orq %r13, %r12 +; SSE-NEXT: movq %rcx, %r13 +; SSE-NEXT: andq %rbp, %r13 +; SSE-NEXT: andq %rax, %r15 +; SSE-NEXT: orq %r13, %r15 +; SSE-NEXT: movq 32(%rdi), %r14 +; SSE-NEXT: movq %r14, %rcx +; SSE-NEXT: andq %r9, %rcx +; SSE-NEXT: movq (%rdi), %r13 +; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rbx, %r13 +; SSE-NEXT: orq %rcx, %r13 +; SSE-NEXT: orq %r12, %r13 +; SSE-NEXT: movq 40(%rdi), %rcx +; SSE-NEXT: movq %rcx, %r12 +; SSE-NEXT: andq %r10, %r12 +; SSE-NEXT: movq 8(%rdi), %rdx +; SSE-NEXT: movq %rdx, %rax +; SSE-NEXT: andq %r11, %rax +; SSE-NEXT: orq %r12, %rax +; SSE-NEXT: orq %r15, %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; SSE-NEXT: orq %rcx, %r10 +; SSE-NEXT: orq %r14, %r9 +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; SSE-NEXT: orq %rdx, %r11 +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; SSE-NEXT: orq %r13, %rax +; SSE-NEXT: movq %r8, 48(%rdi) +; SSE-NEXT: movq %rbp, 56(%rdi) +; SSE-NEXT: movq %r9, 32(%rdi) +; SSE-NEXT: movq %r10, 40(%rdi) +; SSE-NEXT: movq %rsi, 16(%rdi) +; SSE-NEXT: movq %r15, 24(%rdi) +; SSE-NEXT: movq %rbx, (%rdi) +; SSE-NEXT: movq %r11, 8(%rdi) +; SSE-NEXT: setne %al +; SSE-NEXT: addq $56, %rsp +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: set_ne_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: subq $72, %rsp +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0] +; AVX2-NEXT: vmovups %ymm0, (%rsp) +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: andl $63, %ecx +; AVX2-NEXT: shrl $3, %esi +; AVX2-NEXT: andl $56, %esi +; AVX2-NEXT: negl %esi +; AVX2-NEXT: movslq %esi, %rbx +; AVX2-NEXT: movq 16(%rsp,%rbx), %rsi +; AVX2-NEXT: movq 24(%rsp,%rbx), %rbp +; AVX2-NEXT: movq %rbp, %rax +; AVX2-NEXT: shldq %cl, %rsi, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 48(%rsp,%rbx), %r8 +; AVX2-NEXT: movq 56(%rsp,%rbx), %r13 +; AVX2-NEXT: shldq %cl, %r8, %r13 +; AVX2-NEXT: movq 32(%rsp,%rbx), %r9 +; AVX2-NEXT: movq 40(%rsp,%rbx), %r14 +; AVX2-NEXT: movq %r14, %r10 +; AVX2-NEXT: shldq %cl, %r9, %r10 +; AVX2-NEXT: movq 8(%rsp,%rbx), %r11 +; AVX2-NEXT: shldq %cl, %r11, %rsi +; AVX2-NEXT: shldq %cl, %r14, %r8 +; AVX2-NEXT: movq 16(%rdi), %r12 +; AVX2-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 48(%rdi), %r14 +; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %r8, %r14 +; AVX2-NEXT: andq %rsi, %r12 +; AVX2-NEXT: orq %r14, %r12 +; AVX2-NEXT: movq 56(%rdi), %r15 +; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %r13, %r15 +; AVX2-NEXT: movq 24(%rdi), %r14 +; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %rax, %r14 +; AVX2-NEXT: orq %r15, %r14 +; AVX2-NEXT: shldq %cl, %rbp, %r9 +; AVX2-NEXT: movq (%rsp,%rbx), %rdx +; AVX2-NEXT: movq 32(%rdi), %r15 +; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %r9, %r15 +; AVX2-NEXT: shlxq %rcx, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq (%rdi), %rbx +; AVX2-NEXT: movq %rbx, %rbp +; AVX2-NEXT: andq %rax, %rbp +; AVX2-NEXT: orq %r15, %rbp +; AVX2-NEXT: orq %r12, %rbp +; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx +; AVX2-NEXT: shldq %cl, %rdx, %r11 +; AVX2-NEXT: movq 40(%rdi), %rax +; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: andq %r10, %rcx +; AVX2-NEXT: movq 8(%rdi), %r15 +; AVX2-NEXT: movq %r15, %r12 +; AVX2-NEXT: andq %r11, %r12 +; AVX2-NEXT: orq %rcx, %r12 +; AVX2-NEXT: orq %r14, %r12 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; AVX2-NEXT: orq %rax, %r10 +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; AVX2-NEXT: orq %r15, %r11 +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; AVX2-NEXT: orq %rbp, %r12 +; AVX2-NEXT: movq %r8, 48(%rdi) +; AVX2-NEXT: movq %r13, 56(%rdi) +; AVX2-NEXT: movq %r9, 32(%rdi) +; AVX2-NEXT: movq %r10, 40(%rdi) +; AVX2-NEXT: movq %rsi, 16(%rdi) +; AVX2-NEXT: movq %rcx, 24(%rdi) +; AVX2-NEXT: movq %rbx, (%rdi) +; AVX2-NEXT: movq %r11, 8(%rdi) +; AVX2-NEXT: setne %al +; AVX2-NEXT: addq $72, %rsp +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: set_ne_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: subq $72, %rsp +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0] +; AVX512-NEXT: vmovups %ymm0, (%rsp) +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: andl $63, %ecx +; AVX512-NEXT: shrl $3, %esi +; AVX512-NEXT: andl $56, %esi +; AVX512-NEXT: negl %esi +; AVX512-NEXT: movslq %esi, %rbx +; AVX512-NEXT: movq 16(%rsp,%rbx), %rsi +; AVX512-NEXT: movq 24(%rsp,%rbx), %rbp +; AVX512-NEXT: movq %rbp, %rax +; AVX512-NEXT: shldq %cl, %rsi, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 48(%rsp,%rbx), %r8 +; AVX512-NEXT: movq 56(%rsp,%rbx), %r13 +; AVX512-NEXT: shldq %cl, %r8, %r13 +; AVX512-NEXT: movq 32(%rsp,%rbx), %r9 +; AVX512-NEXT: movq 40(%rsp,%rbx), %r14 +; AVX512-NEXT: movq %r14, %r10 +; AVX512-NEXT: shldq %cl, %r9, %r10 +; AVX512-NEXT: movq 8(%rsp,%rbx), %r11 +; AVX512-NEXT: shldq %cl, %r11, %rsi +; AVX512-NEXT: shldq %cl, %r14, %r8 +; AVX512-NEXT: movq 16(%rdi), %r12 +; AVX512-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 48(%rdi), %r14 +; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %r8, %r14 +; AVX512-NEXT: andq %rsi, %r12 +; AVX512-NEXT: orq %r14, %r12 +; AVX512-NEXT: movq 56(%rdi), %r15 +; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %r13, %r15 +; AVX512-NEXT: movq 24(%rdi), %r14 +; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %rax, %r14 +; AVX512-NEXT: orq %r15, %r14 +; AVX512-NEXT: shldq %cl, %rbp, %r9 +; AVX512-NEXT: movq (%rsp,%rbx), %rdx +; AVX512-NEXT: movq 32(%rdi), %r15 +; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %r9, %r15 +; AVX512-NEXT: shlxq %rcx, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq (%rdi), %rbx +; AVX512-NEXT: movq %rbx, %rbp +; AVX512-NEXT: andq %rax, %rbp +; AVX512-NEXT: orq %r15, %rbp +; AVX512-NEXT: orq %r12, %rbp +; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx +; AVX512-NEXT: shldq %cl, %rdx, %r11 +; AVX512-NEXT: movq 40(%rdi), %rax +; AVX512-NEXT: movq %rax, %rcx +; AVX512-NEXT: andq %r10, %rcx +; AVX512-NEXT: movq 8(%rdi), %r15 +; AVX512-NEXT: movq %r15, %r12 +; AVX512-NEXT: andq %r11, %r12 +; AVX512-NEXT: orq %rcx, %r12 +; AVX512-NEXT: orq %r14, %r12 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; AVX512-NEXT: orq %rax, %r10 +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; AVX512-NEXT: orq %r15, %r11 +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; AVX512-NEXT: orq %rbp, %r12 +; AVX512-NEXT: movq %r8, 48(%rdi) +; AVX512-NEXT: movq %r13, 56(%rdi) +; AVX512-NEXT: movq %r9, 32(%rdi) +; AVX512-NEXT: movq %r10, 40(%rdi) +; AVX512-NEXT: movq %rsi, 16(%rdi) +; AVX512-NEXT: movq %rcx, 24(%rdi) +; AVX512-NEXT: movq %rbx, (%rdi) +; AVX512-NEXT: movq %r11, 8(%rdi) +; AVX512-NEXT: setne %al +; AVX512-NEXT: addq $72, %rsp +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %rem = and i32 %position, 511 %ofs = zext nneg i32 %rem to i512 %bit = shl nuw i512 1, %ofs @@ -976,14 +3383,13 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $352, %esp # imm = 0x160 +; X86-NEXT: subl $432, %esp # imm = 0x1B0 ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, %edx ; X86-NEXT: shrl $3, %edx ; X86-NEXT: andl $60, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl %edx, %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl %edx, %esi ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -1016,58 +3422,60 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl 56(%eax), %esi -; X86-NEXT: movl 60(%eax), %ebx -; X86-NEXT: movl 52(%eax), %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 48(%eax), %edi -; X86-NEXT: movl 44(%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 40(%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 36(%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 32(%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 28(%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 24(%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 20(%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 16(%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 12(%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 8(%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 4(%eax), %eax +; X86-NEXT: movl 56(%esi), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movzbl 16(%ebp), %eax -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: andl $31, %ecx -; X86-NEXT: shldl %cl, %esi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: shldl %cl, %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shldl %cl, %edi, %eax +; X86-NEXT: movl 60(%esi), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: shldl %cl, %ebx, %edi +; X86-NEXT: movl 52(%esi), %eax +; X86-NEXT: movl 48(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 40(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%esi), %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 24(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 20(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 16(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%esi), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 4(%esi), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl 16(%ebp), %ebx +; X86-NEXT: movzbl %bl, %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl $31, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shldl %cl, %edx, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: shldl %cl, %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: shldl %cl, %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: shldl %cl, %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1092,12 +3500,9 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: shldl %cl, %eax, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll %cl, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: shldl %cl, %edi, %edx ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -1129,148 +3534,273 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl 56(%eax), %esi -; X86-NEXT: movl 60(%eax), %edi -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: movl 8(%ebp), %edx -; X86-NEXT: andl 60(%edx), %ebx -; X86-NEXT: orl %edi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 52(%eax), %edi -; X86-NEXT: shldl %cl, %edi, %esi -; X86-NEXT: andl 56(%edx), %ebx -; X86-NEXT: orl %esi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 48(%eax), %esi -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: andl 52(%edx), %ebx -; X86-NEXT: orl %edi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 44(%eax), %edi -; X86-NEXT: shldl %cl, %edi, %esi -; X86-NEXT: andl 48(%edx), %ebx -; X86-NEXT: orl %esi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 40(%eax), %esi -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: andl 44(%edx), %ebx -; X86-NEXT: orl %edi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 36(%eax), %edi -; X86-NEXT: shldl %cl, %edi, %esi -; X86-NEXT: andl 40(%edx), %ebx -; X86-NEXT: orl %esi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 32(%eax), %esi -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: andl 36(%edx), %ebx -; X86-NEXT: orl %edi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 28(%eax), %edi -; X86-NEXT: shldl %cl, %edi, %esi -; X86-NEXT: andl 32(%edx), %ebx -; X86-NEXT: orl %esi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 24(%eax), %esi -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: andl 28(%edx), %ebx -; X86-NEXT: orl %edi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 20(%eax), %edi -; X86-NEXT: shldl %cl, %edi, %esi -; X86-NEXT: andl 24(%edx), %ebx -; X86-NEXT: orl %esi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 16(%eax), %esi -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: andl 20(%edx), %ebx -; X86-NEXT: orl %edi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 12(%eax), %edi -; X86-NEXT: shldl %cl, %edi, %esi -; X86-NEXT: andl 16(%edx), %ebx -; X86-NEXT: orl %esi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 8(%eax), %esi -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: andl 12(%edx), %ebx -; X86-NEXT: orl %edi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: notl %ebx -; X86-NEXT: movl 4(%eax), %edi -; X86-NEXT: shldl %cl, %edi, %esi -; X86-NEXT: andl 8(%edx), %ebx -; X86-NEXT: orl %esi, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: notl %esi -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: shldl %cl, %eax, %edi -; X86-NEXT: andl 4(%edx), %esi -; X86-NEXT: orl %edi, %esi -; X86-NEXT: movl %esi, %edi +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: notl %esi -; X86-NEXT: shll %cl, %eax -; X86-NEXT: andl (%edx), %esi -; X86-NEXT: orl %eax, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl (%edx,%eax), %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 60(%edx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 56(%edx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 52(%edx) +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 16(%ebx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %eax, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 40(%ebx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %eax, %edx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 48(%edx) +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%ebx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edx, %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 56(%ebx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %edx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 24(%ebx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 44(%edx) +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 52(%ebx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edx, %eax +; X86-NEXT: movl %eax, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 40(%edx) +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 20(%ebx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 36(%edx) +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%ebx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%ebx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 60(%ebx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%ebx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %eax, %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 32(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%ebx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%ebx), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edi, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 28(%edx) +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%ebx), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 4(%ebx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl 56(%edi), %ebx +; X86-NEXT: movl 60(%edi), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %eax +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 52(%edi), %eax +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 48(%edi), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %esi, %eax +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 24(%edx) +; X86-NEXT: notl %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl 40(%edi), %ebx +; X86-NEXT: movl 44(%edi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %ebx, %eax +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 36(%edi), %eax +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 32(%edi), %ebx +; X86-NEXT: shldl %cl, %ebx, %eax +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 28(%edi), %eax +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 24(%edi), %ebx +; X86-NEXT: shldl %cl, %ebx, %eax +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 20(%edi), %eax +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 16(%edi), %ebx +; X86-NEXT: shldl %cl, %ebx, %eax +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl 12(%edi), %eax +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl 8(%edi), %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: orl %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 20(%edx) +; X86-NEXT: notl %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl 4(%edi), %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %edx +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 16(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: shldl %cl, %esi, %eax +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl (%edi), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: notl %edi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %eax +; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl %edi, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 12(%edx) -; X86-NEXT: movl %ebx, 8(%edx) -; X86-NEXT: movl %edi, 4(%edx) -; X86-NEXT: movl %esi, (%edx) -; X86-NEXT: setae %al +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 60(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 56(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 52(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 44(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 40(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 36(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 32(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 28(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 24(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 20(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 16(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl %esi, 48(%eax) +; X86-NEXT: sete %al ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -1286,8 +3816,7 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; SSE-NEXT: pushq %r13 ; SSE-NEXT: pushq %r12 ; SSE-NEXT: pushq %rbx -; SSE-NEXT: subq $184, %rsp -; SSE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: subq $216, %rsp ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) ; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) @@ -1300,103 +3829,139 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; SSE-NEXT: movq $1, {{[0-9]+}}(%rsp) ; SSE-NEXT: movl %esi, %ecx ; SSE-NEXT: andl $63, %ecx -; SSE-NEXT: movl %esi, %eax -; SSE-NEXT: shrl $3, %eax -; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT: # kill: def $eax killed $eax killed $rax -; SSE-NEXT: andl $56, %eax -; SSE-NEXT: negl %eax -; SSE-NEXT: movslq %eax, %r12 -; SSE-NEXT: movq 160(%rsp,%r12), %rax -; SSE-NEXT: movq 168(%rsp,%r12), %r10 -; SSE-NEXT: shldq %cl, %rax, %r10 -; SSE-NEXT: movq 152(%rsp,%r12), %rsi -; SSE-NEXT: shldq %cl, %rsi, %rax -; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT: movq 144(%rsp,%r12), %r11 -; SSE-NEXT: shldq %cl, %r11, %rsi -; SSE-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT: movq 136(%rsp,%r12), %rbx -; SSE-NEXT: shldq %cl, %rbx, %r11 -; SSE-NEXT: movq 128(%rsp,%r12), %r14 -; SSE-NEXT: shldq %cl, %r14, %rbx -; SSE-NEXT: movq 120(%rsp,%r12), %r15 -; SSE-NEXT: shldq %cl, %r15, %r14 -; SSE-NEXT: movq 112(%rsp,%r12), %r13 -; SSE-NEXT: shldq %cl, %r13, %r15 -; SSE-NEXT: shlq %cl, %r13 -; SSE-NEXT: movl %edx, %eax +; SSE-NEXT: shrl $3, %esi +; SSE-NEXT: andl $56, %esi +; SSE-NEXT: negl %esi +; SSE-NEXT: movslq %esi, %r10 +; SSE-NEXT: movq 184(%rsp,%r10), %r11 +; SSE-NEXT: movq 192(%rsp,%r10), %rsi +; SSE-NEXT: movq %rsi, %r13 +; SSE-NEXT: shldq %cl, %r11, %r13 +; SSE-NEXT: movq 200(%rsp,%r10), %r15 +; SSE-NEXT: shldq %cl, %rsi, %r15 +; SSE-NEXT: movq 168(%rsp,%r10), %rbx +; SSE-NEXT: movq 176(%rsp,%r10), %rsi +; SSE-NEXT: movq %rsi, %r14 +; SSE-NEXT: shldq %cl, %rbx, %r14 +; SSE-NEXT: shldq %cl, %rsi, %r11 +; SSE-NEXT: movq 152(%rsp,%r10), %rax +; SSE-NEXT: movq 160(%rsp,%r10), %r8 +; SSE-NEXT: movq %r8, %r12 +; SSE-NEXT: shldq %cl, %rax, %r12 +; SSE-NEXT: shldq %cl, %r8, %rbx +; SSE-NEXT: movq 144(%rsp,%r10), %r9 +; SSE-NEXT: movq %r9, %r8 +; SSE-NEXT: shlq %cl, %r8 +; SSE-NEXT: shldq %cl, %r9, %rax +; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movl %edx, %edx ; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) ; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, (%rsp) ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movq 32(%rsp,%r12), %rax -; SSE-NEXT: movq 40(%rsp,%r12), %rdx -; SSE-NEXT: shldq %cl, %rax, %rdx +; SSE-NEXT: movq 16(%rdi), %rdx ; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT: movq 24(%rsp,%r12), %rdx -; SSE-NEXT: shldq %cl, %rdx, %rax -; SSE-NEXT: movq 16(%rsp,%r12), %rsi -; SSE-NEXT: shldq %cl, %rsi, %rdx -; SSE-NEXT: movq 8(%rsp,%r12), %r8 -; SSE-NEXT: shldq %cl, %r8, %rsi -; SSE-NEXT: movq (%rsp,%r12), %rbp -; SSE-NEXT: shldq %cl, %rbp, %r8 -; SSE-NEXT: movq -8(%rsp,%r12), %r9 -; SSE-NEXT: shldq %cl, %r9, %rbp -; SSE-NEXT: notq %r10 -; SSE-NEXT: andq 56(%rdi), %r10 -; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload -; SSE-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; SSE-NEXT: notq %r10 -; SSE-NEXT: andq 48(%rdi), %r10 -; SSE-NEXT: orq %rax, %r10 +; SSE-NEXT: movq 48(%rdi), %rsi +; SSE-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rsi, %r13 +; SSE-NEXT: andq %rdx, %r12 +; SSE-NEXT: orq %r13, %r12 +; SSE-NEXT: movq %r15, %rsi +; SSE-NEXT: movq 56(%rdi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rdx, %r15 +; SSE-NEXT: movq %rbx, %r13 +; SSE-NEXT: movq 24(%rdi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rdx, %rbx +; SSE-NEXT: orq %r15, %rbx +; SSE-NEXT: movq %r14, %rbp +; SSE-NEXT: movq 32(%rdi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rdx, %r14 +; SSE-NEXT: movq %r8, %r15 +; SSE-NEXT: movq (%rdi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rdx, %r8 +; SSE-NEXT: orq %r14, %r8 +; SSE-NEXT: orq %r12, %r8 +; SSE-NEXT: movq %r11, %r12 +; SSE-NEXT: movq 40(%rdi), %r9 +; SSE-NEXT: andq %r9, %r11 +; SSE-NEXT: movq %rax, %r14 +; SSE-NEXT: movq 8(%rdi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq %rdx, %rax +; SSE-NEXT: orq %r11, %rax +; SSE-NEXT: orq %rbx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; SSE-NEXT: notq %rax -; SSE-NEXT: andq 40(%rdi), %rax -; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; SSE-NEXT: movq %rax, %rdx +; SSE-NEXT: movq 56(%rsp,%r10), %r11 +; SSE-NEXT: movq 64(%rsp,%r10), %rax +; SSE-NEXT: movq %rax, %rbx +; SSE-NEXT: shldq %cl, %r11, %rbx +; SSE-NEXT: orq %rbx, %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: notq %rsi +; SSE-NEXT: movq 72(%rsp,%r10), %rbx +; SSE-NEXT: shldq %cl, %rax, %rbx +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; SSE-NEXT: orq %rbx, %rsi +; SSE-NEXT: notq %rbp +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; SSE-NEXT: movq 40(%rsp,%r10), %rax +; SSE-NEXT: movq 48(%rsp,%r10), %rdx +; SSE-NEXT: movq %rdx, %rbx +; SSE-NEXT: shldq %cl, %rax, %rbx +; SSE-NEXT: orq %rbx, %rbp +; SSE-NEXT: notq %r12 +; SSE-NEXT: andq %r9, %r12 +; SSE-NEXT: shldq %cl, %rdx, %r11 +; SSE-NEXT: movq 24(%rsp,%r10), %r9 +; SSE-NEXT: movq 32(%rsp,%r10), %rdx +; SSE-NEXT: movq %rdx, %rbx +; SSE-NEXT: shldq %cl, %r9, %rbx +; SSE-NEXT: orq %r11, %r12 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload ; SSE-NEXT: notq %r11 -; SSE-NEXT: andq 32(%rdi), %r11 -; SSE-NEXT: orq %rsi, %r11 -; SSE-NEXT: notq %rbx -; SSE-NEXT: andq 24(%rdi), %rbx -; SSE-NEXT: orq %r8, %rbx -; SSE-NEXT: notq %r14 -; SSE-NEXT: andq 16(%rdi), %r14 -; SSE-NEXT: orq %rbp, %r14 -; SSE-NEXT: notq %r15 -; SSE-NEXT: movq -16(%rsp,%r12), %rax -; SSE-NEXT: shldq %cl, %rax, %r9 -; SSE-NEXT: andq 8(%rdi), %r15 -; SSE-NEXT: orq %r9, %r15 +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: orq %rbx, %r11 ; SSE-NEXT: notq %r13 -; SSE-NEXT: # kill: def $cl killed $cl killed $ecx -; SSE-NEXT: shlq %cl, %rax -; SSE-NEXT: andq (%rdi), %r13 +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; SSE-NEXT: orq %rax, %r13 +; SSE-NEXT: notq %r15 +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; SSE-NEXT: movq 16(%rsp,%r10), %rax +; SSE-NEXT: movq %rax, %rdx +; SSE-NEXT: shlq %cl, %rdx +; SSE-NEXT: orq %rdx, %r15 +; SSE-NEXT: notq %r14 +; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE-NEXT: shldq %cl, %rax, %r9 +; SSE-NEXT: orq %r9, %r14 +; SSE-NEXT: orq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT: andl $60, %eax -; SSE-NEXT: movl (%rdi,%rax), %eax -; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload -; SSE-NEXT: btl %ecx, %eax -; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT: movq %rax, 56(%rdi) -; SSE-NEXT: movq %r10, 48(%rdi) -; SSE-NEXT: movq %rdx, 40(%rdi) -; SSE-NEXT: movq %r11, 32(%rdi) -; SSE-NEXT: movq %rbx, 24(%rdi) -; SSE-NEXT: movq %r14, 16(%rdi) -; SSE-NEXT: movq %r15, 8(%rdi) -; SSE-NEXT: movq %r13, (%rdi) -; SSE-NEXT: setae %al -; SSE-NEXT: addq $184, %rsp +; SSE-NEXT: movq %rax, 48(%rdi) +; SSE-NEXT: movq %rsi, 56(%rdi) +; SSE-NEXT: movq %rbp, 32(%rdi) +; SSE-NEXT: movq %r12, 40(%rdi) +; SSE-NEXT: movq %r11, 16(%rdi) +; SSE-NEXT: movq %r13, 24(%rdi) +; SSE-NEXT: movq %r15, (%rdi) +; SSE-NEXT: movq %r14, 8(%rdi) +; SSE-NEXT: sete %al +; SSE-NEXT: addq $216, %rsp ; SSE-NEXT: popq %rbx ; SSE-NEXT: popq %r12 ; SSE-NEXT: popq %r13 @@ -1413,103 +3978,132 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; AVX2-NEXT: pushq %r13 ; AVX2-NEXT: pushq %r12 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: subq $168, %rsp +; AVX2-NEXT: subq $200, %rsp ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) ; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) ; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [1,0,0,0] ; AVX2-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) -; AVX2-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX2-NEXT: movl %esi, %ecx -; AVX2-NEXT: andl $63, %ecx -; AVX2-NEXT: movl %esi, %r11d -; AVX2-NEXT: shrl $3, %r11d -; AVX2-NEXT: movl %r11d, %eax -; AVX2-NEXT: andl $56, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: movslq %eax, %r10 -; AVX2-NEXT: movq 104(%rsp,%r10), %r15 -; AVX2-NEXT: movq 112(%rsp,%r10), %rax -; AVX2-NEXT: movq %rax, %rsi -; AVX2-NEXT: shldq %cl, %r15, %rsi -; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movq 120(%rsp,%r10), %rsi -; AVX2-NEXT: movq %rsi, %r8 -; AVX2-NEXT: shldq %cl, %rax, %r8 -; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movq 128(%rsp,%r10), %rax -; AVX2-NEXT: movq %rax, %rbx -; AVX2-NEXT: shldq %cl, %rsi, %rbx -; AVX2-NEXT: movq 136(%rsp,%r10), %rsi -; AVX2-NEXT: movq %rsi, %r14 -; AVX2-NEXT: shldq %cl, %rax, %r14 -; AVX2-NEXT: movq 144(%rsp,%r10), %rax -; AVX2-NEXT: movq %rax, %r12 -; AVX2-NEXT: shldq %cl, %rsi, %r12 -; AVX2-NEXT: movq 96(%rsp,%r10), %rsi -; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movq 152(%rsp,%r10), %r13 -; AVX2-NEXT: shldq %cl, %rax, %r13 -; AVX2-NEXT: shldq %cl, %rsi, %r15 -; AVX2-NEXT: movl %edx, %eax +; AVX2-NEXT: movl %esi, %r8d +; AVX2-NEXT: andl $63, %r8d +; AVX2-NEXT: shrl $3, %esi +; AVX2-NEXT: andl $56, %esi +; AVX2-NEXT: negl %esi +; AVX2-NEXT: movslq %esi, %rsi +; AVX2-NEXT: movq 144(%rsp,%rsi), %r11 +; AVX2-NEXT: movq 152(%rsp,%rsi), %r12 +; AVX2-NEXT: movq %r12, %r10 +; AVX2-NEXT: movl %r8d, %ecx +; AVX2-NEXT: shldq %cl, %r11, %r10 +; AVX2-NEXT: movq 176(%rsp,%rsi), %r14 +; AVX2-NEXT: movq 184(%rsp,%rsi), %r9 +; AVX2-NEXT: shldq %cl, %r14, %r9 +; AVX2-NEXT: movq 160(%rsp,%rsi), %r15 +; AVX2-NEXT: movq 168(%rsp,%rsi), %r13 +; AVX2-NEXT: movq %r13, %rbx +; AVX2-NEXT: shldq %cl, %r15, %rbx +; AVX2-NEXT: movq 128(%rsp,%rsi), %rbp +; AVX2-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 136(%rsp,%rsi), %rax +; AVX2-NEXT: shldq %cl, %rax, %r11 +; AVX2-NEXT: shldq %cl, %r13, %r14 +; AVX2-NEXT: shldq %cl, %r12, %r15 +; AVX2-NEXT: shldq %cl, %rbp, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movl %edx, %edx ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vmovups %xmm1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %rdx, (%rsp) ; AVX2-NEXT: movq $0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: movq 16(%rsp,%r10), %rbp -; AVX2-NEXT: movq 24(%rsp,%r10), %r9 -; AVX2-NEXT: shldq %cl, %rbp, %r9 -; AVX2-NEXT: movq 8(%rsp,%r10), %rdx -; AVX2-NEXT: shldq %cl, %rdx, %rbp -; AVX2-NEXT: movq (%rsp,%r10), %rax -; AVX2-NEXT: shldq %cl, %rax, %rdx -; AVX2-NEXT: movq -8(%rsp,%r10), %r8 -; AVX2-NEXT: shldq %cl, %r8, %rax -; AVX2-NEXT: movq -16(%rsp,%r10), %rsi -; AVX2-NEXT: shldq %cl, %rsi, %r8 -; AVX2-NEXT: andnq 56(%rdi), %r13, %r13 -; AVX2-NEXT: orq %r9, %r13 -; AVX2-NEXT: movq -24(%rsp,%r10), %r9 -; AVX2-NEXT: shldq %cl, %r9, %rsi -; AVX2-NEXT: andnq 48(%rdi), %r12, %r12 -; AVX2-NEXT: andnq 40(%rdi), %r14, %r14 +; AVX2-NEXT: movq 16(%rdi), %r12 +; AVX2-NEXT: movq 48(%rdi), %rbp +; AVX2-NEXT: movq 32(%rdi), %r13 +; AVX2-NEXT: andnq %r13, %r15, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %r15, %r13 +; AVX2-NEXT: andnq %rbp, %r14, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %r14, %rbp +; AVX2-NEXT: andnq %r12, %r11, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %r11, %r12 +; AVX2-NEXT: movq 40(%rdi), %rax ; AVX2-NEXT: orq %rbp, %r12 -; AVX2-NEXT: orq %rdx, %r14 -; AVX2-NEXT: andnq 32(%rdi), %rbx, %rdx -; AVX2-NEXT: orq %rax, %rdx -; AVX2-NEXT: shlxq %rcx, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload -; AVX2-NEXT: movq -32(%rsp,%r10), %r10 -; AVX2-NEXT: shlxq %rcx, %r10, %rbx -; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx -; AVX2-NEXT: shldq %cl, %r10, %r9 +; AVX2-NEXT: andnq %rax, %rbx, %rcx +; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq %rax, %rbp +; AVX2-NEXT: andq %rbx, %rbp +; AVX2-NEXT: movq 56(%rdi), %rcx +; AVX2-NEXT: andnq %rcx, %r9, %rbx +; AVX2-NEXT: andq %r9, %rcx +; AVX2-NEXT: movq 24(%rdi), %rax +; AVX2-NEXT: andnq %rax, %r10, %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq %r10, %rax +; AVX2-NEXT: orq %rcx, %rax +; AVX2-NEXT: shlxq %r8, {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX2-NEXT: movq (%rdi), %r10 +; AVX2-NEXT: andnq %r10, %rcx, %r15 +; AVX2-NEXT: andq %rcx, %r10 +; AVX2-NEXT: movq 40(%rsp,%rsi), %rdx +; AVX2-NEXT: movq 48(%rsp,%rsi), %r11 +; AVX2-NEXT: movq %r11, %r9 +; AVX2-NEXT: movl %r8d, %ecx +; AVX2-NEXT: shldq %cl, %rdx, %r9 +; AVX2-NEXT: orq %r13, %r10 +; AVX2-NEXT: orq %r12, %r10 +; AVX2-NEXT: movq 8(%rdi), %r13 ; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX2-NEXT: andnq 24(%rdi), %rcx, %rcx -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT: andnq 16(%rdi), %r10, %r10 -; AVX2-NEXT: orq %r8, %rcx -; AVX2-NEXT: orq %rsi, %r10 -; AVX2-NEXT: andnq 8(%rdi), %r15, %rsi -; AVX2-NEXT: orq %r9, %rsi -; AVX2-NEXT: andnq (%rdi), %rax, %rax -; AVX2-NEXT: orq %rbx, %rax -; AVX2-NEXT: andl $60, %r11d -; AVX2-NEXT: movl (%rdi,%r11), %r8d -; AVX2-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload -; AVX2-NEXT: btl %r9d, %r8d -; AVX2-NEXT: movq %r13, 56(%rdi) -; AVX2-NEXT: movq %r12, 48(%rdi) -; AVX2-NEXT: movq %r14, 40(%rdi) -; AVX2-NEXT: movq %rdx, 32(%rdi) -; AVX2-NEXT: movq %rcx, 24(%rdi) -; AVX2-NEXT: movq %r10, 16(%rdi) -; AVX2-NEXT: movq %rsi, 8(%rdi) -; AVX2-NEXT: movq %rax, (%rdi) -; AVX2-NEXT: setae %al -; AVX2-NEXT: addq $168, %rsp +; AVX2-NEXT: andnq %r13, %rcx, %r12 +; AVX2-NEXT: andq %rcx, %r13 +; AVX2-NEXT: orq %rbp, %r13 +; AVX2-NEXT: orq %rax, %r13 +; AVX2-NEXT: movq 56(%rsp,%rsi), %rax +; AVX2-NEXT: movl %r8d, %ecx +; AVX2-NEXT: shldq %cl, %r11, %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: orq %r9, %r14 +; AVX2-NEXT: orq %rax, %rbx +; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 24(%rsp,%rsi), %rax +; AVX2-NEXT: movq 32(%rsp,%rsi), %r9 +; AVX2-NEXT: movq %r9, %r11 +; AVX2-NEXT: shldq %cl, %rax, %r11 +; AVX2-NEXT: shldq %cl, %r9, %rdx +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; AVX2-NEXT: orq %r11, %rbp +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX2-NEXT: orq %rdx, %rbx +; AVX2-NEXT: movq 8(%rsp,%rsi), %rdx +; AVX2-NEXT: movq 16(%rsp,%rsi), %r9 +; AVX2-NEXT: movq %r9, %r11 +; AVX2-NEXT: shldq %cl, %rdx, %r11 +; AVX2-NEXT: shldq %cl, %r9, %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; AVX2-NEXT: orq %r11, %r9 +; AVX2-NEXT: movq (%rsp,%rsi), %rsi +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; AVX2-NEXT: orq %rax, %r11 +; AVX2-NEXT: shlxq %r8, %rsi, %rax +; AVX2-NEXT: shldq %cl, %rsi, %rdx +; AVX2-NEXT: orq %rax, %r15 +; AVX2-NEXT: orq %rdx, %r12 +; AVX2-NEXT: orq %r10, %r13 +; AVX2-NEXT: movq %r14, 48(%rdi) +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: movq %rax, 56(%rdi) +; AVX2-NEXT: movq %rbp, 32(%rdi) +; AVX2-NEXT: movq %rbx, 40(%rdi) +; AVX2-NEXT: movq %r9, 16(%rdi) +; AVX2-NEXT: movq %r11, 24(%rdi) +; AVX2-NEXT: movq %r15, (%rdi) +; AVX2-NEXT: movq %r12, 8(%rdi) +; AVX2-NEXT: sete %al +; AVX2-NEXT: addq $200, %rsp ; AVX2-NEXT: popq %rbx ; AVX2-NEXT: popq %r12 ; AVX2-NEXT: popq %r13 @@ -1527,100 +4121,131 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; AVX512-NEXT: pushq %r13 ; AVX512-NEXT: pushq %r12 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: subq $152, %rsp +; AVX512-NEXT: subq $184, %rsp ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovaps {{.*#+}} xmm1 = [1,0,0,0] ; AVX512-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; AVX512-NEXT: movl %esi, %ecx ; AVX512-NEXT: andl $63, %ecx -; AVX512-NEXT: movl %esi, %r8d -; AVX512-NEXT: shrl $3, %r8d -; AVX512-NEXT: movl %r8d, %eax -; AVX512-NEXT: andl $56, %eax -; AVX512-NEXT: negl %eax -; AVX512-NEXT: movslq %eax, %r9 -; AVX512-NEXT: movq 88(%rsp,%r9), %r10 -; AVX512-NEXT: movq 96(%rsp,%r9), %rax -; AVX512-NEXT: movq %rax, %rsi -; AVX512-NEXT: shldq %cl, %r10, %rsi -; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT: movq 104(%rsp,%r9), %rsi -; AVX512-NEXT: movq %rsi, %r11 -; AVX512-NEXT: shldq %cl, %rax, %r11 -; AVX512-NEXT: movq 112(%rsp,%r9), %rax -; AVX512-NEXT: movq %rax, %rbx -; AVX512-NEXT: shldq %cl, %rsi, %rbx -; AVX512-NEXT: movq 120(%rsp,%r9), %rsi -; AVX512-NEXT: movq %rsi, %r14 -; AVX512-NEXT: shldq %cl, %rax, %r14 -; AVX512-NEXT: movq 128(%rsp,%r9), %rax -; AVX512-NEXT: movq %rax, %r12 -; AVX512-NEXT: shldq %cl, %rsi, %r12 -; AVX512-NEXT: movq 136(%rsp,%r9), %r13 -; AVX512-NEXT: shldq %cl, %rax, %r13 -; AVX512-NEXT: movq 80(%rsp,%r9), %r15 -; AVX512-NEXT: shldq %cl, %r15, %r10 -; AVX512-NEXT: movl %edx, %eax +; AVX512-NEXT: shrl $3, %esi +; AVX512-NEXT: andl $56, %esi +; AVX512-NEXT: negl %esi +; AVX512-NEXT: movslq %esi, %rsi +; AVX512-NEXT: movq 128(%rsp,%rsi), %r10 +; AVX512-NEXT: movq 136(%rsp,%rsi), %r12 +; AVX512-NEXT: movq %r12, %rax +; AVX512-NEXT: shldq %cl, %r10, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 160(%rsp,%rsi), %r14 +; AVX512-NEXT: movq 168(%rsp,%rsi), %rax +; AVX512-NEXT: shldq %cl, %r14, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 144(%rsp,%rsi), %r15 +; AVX512-NEXT: movq 152(%rsp,%rsi), %r11 +; AVX512-NEXT: movq %r11, %rbx +; AVX512-NEXT: shldq %cl, %r15, %rbx +; AVX512-NEXT: movq 120(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rax, %r10 +; AVX512-NEXT: shldq %cl, %r11, %r14 +; AVX512-NEXT: movq %rdi, %r9 +; AVX512-NEXT: movq 112(%rsp,%rsi), %r11 +; AVX512-NEXT: shldq %cl, %r12, %r15 +; AVX512-NEXT: movl %edx, %edx ; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vmovups %xmm1, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %xmm1, {{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movq (%rsp,%r9), %rbp -; AVX512-NEXT: movq 8(%rsp,%r9), %rsi -; AVX512-NEXT: shldq %cl, %rbp, %rsi -; AVX512-NEXT: movq -8(%rsp,%r9), %rdx -; AVX512-NEXT: shldq %cl, %rdx, %rbp -; AVX512-NEXT: movq -16(%rsp,%r9), %rax -; AVX512-NEXT: shldq %cl, %rax, %rdx -; AVX512-NEXT: andnq 56(%rdi), %r13, %r13 -; AVX512-NEXT: andnq 48(%rdi), %r12, %r12 -; AVX512-NEXT: orq %rsi, %r13 -; AVX512-NEXT: orq %rbp, %r12 -; AVX512-NEXT: andnq 40(%rdi), %r14, %r14 -; AVX512-NEXT: orq %rdx, %r14 -; AVX512-NEXT: movq -24(%rsp,%r9), %rsi -; AVX512-NEXT: shldq %cl, %rsi, %rax -; AVX512-NEXT: andnq 32(%rdi), %rbx, %rdx -; AVX512-NEXT: orq %rax, %rdx -; AVX512-NEXT: movq -32(%rsp,%r9), %rax -; AVX512-NEXT: shldq %cl, %rax, %rsi -; AVX512-NEXT: shlxq %rcx, %r15, %rbx -; AVX512-NEXT: andnq 24(%rdi), %r11, %r11 -; AVX512-NEXT: orq %rsi, %r11 -; AVX512-NEXT: movq -48(%rsp,%r9), %rsi -; AVX512-NEXT: movq -40(%rsp,%r9), %r9 -; AVX512-NEXT: shldq %cl, %r9, %rax -; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT: andnq 16(%rdi), %r15, %r15 +; AVX512-NEXT: movq 16(%rdi), %r12 +; AVX512-NEXT: movq 48(%rdi), %r13 +; AVX512-NEXT: movq 32(%rdi), %rbp +; AVX512-NEXT: andnq %rbp, %r15, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %r15, %rbp +; AVX512-NEXT: andnq %r13, %r14, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %r14, %r13 +; AVX512-NEXT: andnq %r12, %r10, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq %r10, %r12 +; AVX512-NEXT: movq 40(%rdi), %r8 +; AVX512-NEXT: orq %r13, %r12 +; AVX512-NEXT: andnq %r8, %rbx, %rdi +; AVX512-NEXT: andq %rbx, %r8 +; AVX512-NEXT: movq 56(%r9), %r13 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; AVX512-NEXT: andnq %r13, %rdx, %r10 +; AVX512-NEXT: andq %rdx, %r13 +; AVX512-NEXT: movq 24(%r9), %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; AVX512-NEXT: andnq %rax, %rdx, %r15 +; AVX512-NEXT: andq %rdx, %rax +; AVX512-NEXT: orq %r13, %rax +; AVX512-NEXT: shlxq %rcx, %r11, %r13 +; AVX512-NEXT: movq (%r9), %rdx +; AVX512-NEXT: andnq %rdx, %r13, %r14 +; AVX512-NEXT: andq %r13, %rdx +; AVX512-NEXT: orq %rbp, %rdx +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r11, %rbp +; AVX512-NEXT: orq %r12, %rdx +; AVX512-NEXT: movq 8(%r9), %r13 +; AVX512-NEXT: andnq %r13, %rbp, %rbx +; AVX512-NEXT: andq %rbp, %r13 +; AVX512-NEXT: orq %r8, %r13 +; AVX512-NEXT: movq 24(%rsp,%rsi), %r8 +; AVX512-NEXT: orq %rax, %r13 +; AVX512-NEXT: movq 32(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, %r12 +; AVX512-NEXT: shldq %cl, %r8, %r12 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; AVX512-NEXT: orq %r12, %r11 +; AVX512-NEXT: movq 40(%rsp,%rsi), %r12 +; AVX512-NEXT: shldq %cl, %rax, %r12 +; AVX512-NEXT: orq %r12, %r10 +; AVX512-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 8(%rsp,%rsi), %rax +; AVX512-NEXT: movq 16(%rsp,%rsi), %r12 +; AVX512-NEXT: movq %r12, %rbp +; AVX512-NEXT: shldq %cl, %rax, %rbp +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX512-NEXT: orq %rbp, %r10 +; AVX512-NEXT: shldq %cl, %r12, %r8 +; AVX512-NEXT: orq %r8, %rdi +; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq -8(%rsp,%rsi), %r8 +; AVX512-NEXT: movq (%rsp,%rsi), %r12 +; AVX512-NEXT: movq %r12, %rbp +; AVX512-NEXT: shldq %cl, %r8, %rbp +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; AVX512-NEXT: orq %rbp, %rdi +; AVX512-NEXT: movq -16(%rsp,%rsi), %rsi +; AVX512-NEXT: shldq %cl, %r12, %rax ; AVX512-NEXT: orq %rax, %r15 ; AVX512-NEXT: shlxq %rcx, %rsi, %rax ; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx -; AVX512-NEXT: shldq %cl, %rsi, %r9 -; AVX512-NEXT: andnq 8(%rdi), %r10, %rcx -; AVX512-NEXT: orq %r9, %rcx -; AVX512-NEXT: andnq (%rdi), %rbx, %rsi -; AVX512-NEXT: orq %rax, %rsi -; AVX512-NEXT: andl $60, %r8d -; AVX512-NEXT: movl (%rdi,%r8), %eax -; AVX512-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload -; AVX512-NEXT: btl %r8d, %eax -; AVX512-NEXT: movq %r13, 56(%rdi) -; AVX512-NEXT: movq %r12, 48(%rdi) -; AVX512-NEXT: movq %r14, 40(%rdi) -; AVX512-NEXT: movq %rdx, 32(%rdi) -; AVX512-NEXT: movq %r11, 24(%rdi) -; AVX512-NEXT: movq %r15, 16(%rdi) -; AVX512-NEXT: movq %rcx, 8(%rdi) -; AVX512-NEXT: movq %rsi, (%rdi) -; AVX512-NEXT: setae %al -; AVX512-NEXT: addq $152, %rsp +; AVX512-NEXT: shldq %cl, %rsi, %r8 +; AVX512-NEXT: orq %rax, %r14 +; AVX512-NEXT: orq %r8, %rbx +; AVX512-NEXT: orq %rdx, %r13 +; AVX512-NEXT: movq %r11, 48(%r9) +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: movq %rax, 56(%r9) +; AVX512-NEXT: movq %r10, 32(%r9) +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: movq %rax, 40(%r9) +; AVX512-NEXT: movq %rdi, 16(%r9) +; AVX512-NEXT: movq %r15, 24(%r9) +; AVX512-NEXT: movq %r14, (%r9) +; AVX512-NEXT: movq %rbx, 8(%r9) +; AVX512-NEXT: sete %al +; AVX512-NEXT: addq $184, %rsp ; AVX512-NEXT: popq %rbx ; AVX512-NEXT: popq %r12 ; AVX512-NEXT: popq %r13 @@ -1649,25 +4274,2749 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind { ; X86-LABEL: test_ne_i4096: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $4064, %edx # imm = 0xFE0 -; X86-NEXT: shrl $3, %edx -; X86-NEXT: movl (%eax,%edx), %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: setb %al +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $1792, %esp # imm = 0x700 +; X86-NEXT: movl 12(%ebp), %ebx +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: shrl $3, %ecx +; X86-NEXT: andl $508, %ecx # imm = 0x1FC +; X86-NEXT: leal {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl %ecx, %esi +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 248(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 252(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl $31, %ebx +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 504(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 508(%esi), %edx +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 120(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 124(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 376(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 380(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 184(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 188(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 440(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 444(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 56(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 60(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 312(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 316(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 216(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 220(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 472(%esi), %edi +; X86-NEXT: movl 476(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 88(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 92(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 344(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 348(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 152(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 156(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 408(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 412(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 24(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 280(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 284(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 232(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 236(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 488(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 492(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 104(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 108(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 360(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 364(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 168(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 172(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 424(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 428(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 40(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 296(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 300(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 200(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 204(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 456(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 460(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 72(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 76(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 328(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 332(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 136(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 140(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 392(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 396(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 264(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 268(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 240(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 244(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 496(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 500(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 112(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 116(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 368(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 372(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 176(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 180(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 432(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 436(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 52(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 304(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 308(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 208(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 212(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 464(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 468(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 80(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 84(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 336(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 340(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 144(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 148(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 400(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 404(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 16(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 20(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 272(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 276(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 224(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 228(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 480(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 484(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 96(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 100(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 352(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 356(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 160(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 164(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 416(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 420(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 288(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 292(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 192(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 196(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 448(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 452(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 64(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 68(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 320(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 324(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 128(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 132(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, %edx +; X86-NEXT: movl 256(%esi), %edi +; X86-NEXT: movl 260(%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl %cl, %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl 388(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl 4(%esi), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl $1, %eax, %edi +; X86-NEXT: shrl %eax +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: notb %cl +; X86-NEXT: shrdl %cl, %eax, %edi +; X86-NEXT: shrl %cl, %ebx +; X86-NEXT: movb $32, %cl +; X86-NEXT: testb %cl, %cl +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: jne .LBB20_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: .LBB20_2: +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shll %cl, %edx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 320(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 64(%eax), %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 448(%eax), %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 192(%eax), %ecx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 288(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 32(%eax), %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 416(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 160(%eax), %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 352(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 96(%eax), %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 480(%eax), %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 224(%eax), %ecx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: orl %edi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 272(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 16(%eax), %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 400(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 144(%eax), %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 336(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 80(%eax), %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 464(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 208(%eax), %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: orl %esi, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 304(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 48(%eax), %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 432(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 176(%eax), %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 368(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 112(%eax), %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 496(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: andl 240(%eax), %ebx +; X86-NEXT: orl %ecx, %ebx +; X86-NEXT: orl %edx, %ebx +; X86-NEXT: orl %esi, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 264(%eax), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 8(%eax), %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 392(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 136(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 328(%ebx), %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 72(%ebx), %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 456(%ebx), %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 200(%ebx), %esi +; X86-NEXT: orl %edi, %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 296(%ebx), %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 40(%ebx), %eax +; X86-NEXT: orl %edi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 424(%ebx), %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 168(%ebx), %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 360(%ebx), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 104(%ebx), %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 488(%ebx), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 232(%ebx), %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: orl %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 280(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 24(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 408(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 152(%ebx), %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 344(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 88(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 472(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 216(%ebx), %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 312(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 56(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 440(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 184(%ebx), %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 376(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 120(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 504(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 248(%ebx), %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: orl %esi, %edi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 324(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 68(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 452(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 196(%ebx), %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 292(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 36(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 420(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 164(%ebx), %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 356(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 100(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 484(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 228(%ebx), %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 276(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 20(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 404(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 148(%ebx), %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 340(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 84(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 468(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 212(%ebx), %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 308(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 52(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 436(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 180(%ebx), %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 372(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 116(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 500(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 244(%ebx), %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: orl %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 268(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 12(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 396(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 140(%ebx), %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 332(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 76(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 460(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 204(%ebx), %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 300(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 44(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 428(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 172(%ebx), %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 364(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 108(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 492(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl 236(%ebx), %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: orl %edi, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 284(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 28(%ebx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 412(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 156(%ebx), %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 348(%ebx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 92(%ebx), %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 476(%ebx), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 220(%ebx), %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: orl %edi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 316(%ebx), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 60(%ebx), %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 444(%ebx), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl 188(%ebx), %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 380(%ebx), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl 124(%ebx), %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 508(%ebx), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: andl 252(%esi), %ebx +; X86-NEXT: orl %ecx, %ebx +; X86-NEXT: orl %edx, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: orl %eax, %ebx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: negl %ecx +; X86-NEXT: movl 1648(%esp,%ecx), %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl %cl, %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: andl 128(%edx), %ecx +; X86-NEXT: andl 384(%edx), %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %eax +; X86-NEXT: andl (%edx), %eax +; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 256(%edx), %eax +; X86-NEXT: orl %eax, %edi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 260(%edx), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: andl 4(%edx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl 132(%edx), %eax +; X86-NEXT: andl 388(%edx), %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: orl %ebx, %esi +; X86-NEXT: orl %edi, %esi +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: test_ne_i4096: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: andl $4064, %eax # imm = 0xFE0 -; X64-NEXT: shrl $3, %eax -; X64-NEXT: movl (%rdi,%rax), %eax -; X64-NEXT: btl %esi, %eax -; X64-NEXT: setb %al -; X64-NEXT: retq +; SSE-LABEL: test_ne_i4096: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: subq $1576, %rsp # imm = 0x628 +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl %esi, %eax +; SSE-NEXT: andl $4032, %eax # imm = 0xFC0 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movq $1, {{[0-9]+}}(%rsp) +; SSE-NEXT: andl $63, %ecx +; SSE-NEXT: shrl $3, %eax +; SSE-NEXT: negl %eax +; SSE-NEXT: movslq %eax, %rsi +; SSE-NEXT: movq 1296(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1304(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1552(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1560(%rsp,%rsi), %rax +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1168(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1176(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1424(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1432(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1232(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1240(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1488(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1496(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1104(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1112(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1360(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, (%rsp) # 8-byte Spill +; SSE-NEXT: movq 1368(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1264(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1272(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1520(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1528(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1136(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1144(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1392(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1400(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1200(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1208(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1456(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1464(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1072(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1080(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1328(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1336(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1280(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1288(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1536(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1544(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1152(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1160(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1408(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1416(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1216(%rsp,%rsi), %r11 +; SSE-NEXT: movq 1224(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %r11, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1472(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1480(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1088(%rsp,%rsi), %r9 +; SSE-NEXT: movq 1096(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %r9, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1344(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1352(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1248(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1256(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rax, %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1504(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1512(%rsp,%rsi), %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rdx, %rax +; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1120(%rsp,%rsi), %rax +; SSE-NEXT: movq 1128(%rsp,%rsi), %r8 +; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: shldq %cl, %rax, %r8 +; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1376(%rsp,%rsi), %r13 +; SSE-NEXT: movq 1384(%rsp,%rsi), %rbx +; SSE-NEXT: movq %rbx, %r8 +; SSE-NEXT: shldq %cl, %r13, %r8 +; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1184(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1192(%rsp,%rsi), %r15 +; SSE-NEXT: movq %r15, %r14 +; SSE-NEXT: shldq %cl, %rdx, %r14 +; SSE-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1440(%rsp,%rsi), %r10 +; SSE-NEXT: movq 1448(%rsp,%rsi), %rdx +; SSE-NEXT: movq %rdx, %r14 +; SSE-NEXT: shldq %cl, %r10, %r14 +; SSE-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1312(%rsp,%rsi), %r14 +; SSE-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 1320(%rsp,%rsi), %rbp +; SSE-NEXT: movq %rbp, %r12 +; SSE-NEXT: shldq %cl, %r14, %r12 +; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, (%rsp) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq 1064(%rsp,%rsi), %rbx +; SSE-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; SSE-NEXT: shldq %cl, %rbp, %r14 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: shldq %cl, %rdx, %r11 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r15, %rdx +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r15, %r9 +; SSE-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r15, %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r15, %rbp +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r15, %r9 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r15, %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r15, %r13 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r12, %r15 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; SSE-NEXT: shldq %cl, %r12, %r10 +; SSE-NEXT: andq 384(%rdi), %r10 +; SSE-NEXT: andq 128(%rdi), %r15 +; SSE-NEXT: andq 320(%rdi), %r13 +; SSE-NEXT: andq 64(%rdi), %rax +; SSE-NEXT: orq %r10, %r15 +; SSE-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: orq %r13, %rax +; SSE-NEXT: andq 448(%rdi), %r9 +; SSE-NEXT: andq 192(%rdi), %rbp +; SSE-NEXT: orq %r9, %rbp +; SSE-NEXT: orq %rax, %rbp +; SSE-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: andq 288(%rdi), %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; SSE-NEXT: andq 32(%rdi), %r9 +; SSE-NEXT: andq 416(%rdi), %rdx +; SSE-NEXT: andq 160(%rdi), %r11 +; SSE-NEXT: orq %r8, %r9 +; SSE-NEXT: orq %rdx, %r11 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: andq 352(%rdi), %rdx +; SSE-NEXT: orq %r9, %r11 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 96(%rdi), %rax +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: movq %rax, %rdx +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 480(%rdi), %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: andq 224(%rdi), %r8 +; SSE-NEXT: orq %rax, %r8 +; SSE-NEXT: orq %rdx, %r8 +; SSE-NEXT: andq 272(%rdi), %r14 +; SSE-NEXT: orq %r11, %r8 +; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 16(%rdi), %rax +; SSE-NEXT: orq %r14, %rax +; SSE-NEXT: movq %rax, %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: andq 400(%rdi), %rdx +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 144(%rdi), %rax +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: orq %r8, %rax +; SSE-NEXT: movq %rax, %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; SSE-NEXT: andq 336(%rdi), %r9 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 80(%rdi), %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: andq 464(%rdi), %rdx +; SSE-NEXT: orq %r9, %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; SSE-NEXT: andq 208(%rdi), %r11 +; SSE-NEXT: orq %rdx, %r11 +; SSE-NEXT: orq %rax, %r11 +; SSE-NEXT: orq %r8, %r11 +; SSE-NEXT: movq (%rsp), %rdx # 8-byte Reload +; SSE-NEXT: andq 304(%rdi), %rdx +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 48(%rdi), %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; SSE-NEXT: andq 432(%rdi), %r9 +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: movq %rax, %r10 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: andq 176(%rdi), %r8 +; SSE-NEXT: orq %r9, %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; SSE-NEXT: andq 368(%rdi), %r9 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 112(%rdi), %rax +; SSE-NEXT: orq %r10, %r8 +; SSE-NEXT: movq %r8, %r10 +; SSE-NEXT: orq %r9, %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: andq 496(%rdi), %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; SSE-NEXT: andq 240(%rdi), %rbp +; SSE-NEXT: orq %r8, %rbp +; SSE-NEXT: orq %rax, %rbp +; SSE-NEXT: orq %r10, %rbp +; SSE-NEXT: orq %r11, %rbp +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 392(%rdi), %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; SSE-NEXT: andq 136(%rdi), %r12 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: andq 328(%rdi), %rdx +; SSE-NEXT: orq %rax, %r12 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 72(%rdi), %rax +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: movq %rax, %rdx +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 456(%rdi), %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; SSE-NEXT: andq 200(%rdi), %r13 +; SSE-NEXT: orq %rax, %r13 +; SSE-NEXT: orq %rdx, %r13 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: andq 296(%rdi), %rdx +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 40(%rdi), %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: andq 424(%rdi), %r8 +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: movq %rax, %r9 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: andq 168(%rdi), %rdx +; SSE-NEXT: orq %r8, %rdx +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: andq 360(%rdi), %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 104(%rdi), %rax +; SSE-NEXT: orq %r9, %rdx +; SSE-NEXT: orq %r8, %rax +; SSE-NEXT: movq %rax, %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 488(%rdi), %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: andq 232(%rdi), %r15 +; SSE-NEXT: orq %rax, %r15 +; SSE-NEXT: orq %r8, %r15 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: andq 280(%rdi), %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 24(%rdi), %rax +; SSE-NEXT: orq %rdx, %r15 +; SSE-NEXT: orq %r8, %rax +; SSE-NEXT: movq %rax, %r10 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: andq 408(%rdi), %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 152(%rdi), %rax +; SSE-NEXT: orq %r8, %rax +; SSE-NEXT: orq %r10, %rax +; SSE-NEXT: movq %rax, %r10 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; SSE-NEXT: andq 344(%rdi), %r11 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: andq 88(%rdi), %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 472(%rdi), %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; SSE-NEXT: andq 216(%rdi), %r14 +; SSE-NEXT: orq %r11, %r8 +; SSE-NEXT: orq %rax, %r14 +; SSE-NEXT: orq %r8, %r14 +; SSE-NEXT: orq %r10, %r14 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; SSE-NEXT: andq 312(%rdi), %r11 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; SSE-NEXT: andq 56(%rdi), %r10 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: andq 440(%rdi), %r8 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; SSE-NEXT: andq 184(%rdi), %r9 +; SSE-NEXT: orq %r11, %r10 +; SSE-NEXT: orq %r8, %r9 +; SSE-NEXT: orq %r10, %r9 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE-NEXT: shldq %cl, %rax, %rdx +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; SSE-NEXT: andq 376(%rdi), %r10 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT: andq 120(%rdi), %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; SSE-NEXT: andq 504(%rdi), %r11 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; SSE-NEXT: andq 248(%rdi), %r8 +; SSE-NEXT: orq %r10, %rax +; SSE-NEXT: movq %rax, %r10 +; SSE-NEXT: orq %r11, %r8 +; SSE-NEXT: movq 1056(%rsp,%rsi), %rax +; SSE-NEXT: shldq %cl, %rax, %rbx +; SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE-NEXT: shlq %cl, %rax +; SSE-NEXT: orq %r10, %r8 +; SSE-NEXT: orq %r9, %r8 +; SSE-NEXT: andq 256(%rdi), %rdx +; SSE-NEXT: orq %r14, %r8 +; SSE-NEXT: andq (%rdi), %rax +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; SSE-NEXT: orq %rbp, %rax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; SSE-NEXT: andq 264(%rdi), %rcx +; SSE-NEXT: andq 8(%rdi), %rbx +; SSE-NEXT: orq %rcx, %rbx +; SSE-NEXT: orq %r12, %rbx +; SSE-NEXT: orq %r13, %rbx +; SSE-NEXT: orq %r15, %rbx +; SSE-NEXT: orq %r8, %rbx +; SSE-NEXT: orq %rax, %rbx +; SSE-NEXT: setne %al +; SSE-NEXT: addq $1576, %rsp # imm = 0x628 +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ne_i4096: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: subq $1560, %rsp # imm = 0x618 +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: movl %esi, %eax +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: andl $4032, %eax # imm = 0xFC0 +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0] +; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX2-NEXT: andl $63, %ecx +; AVX2-NEXT: shrl $3, %eax +; AVX2-NEXT: negl %eax +; AVX2-NEXT: movslq %eax, %rsi +; AVX2-NEXT: movq 1280(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1288(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1536(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1544(%rsp,%rsi), %rax +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1152(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1160(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1408(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1416(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1216(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, (%rsp) # 8-byte Spill +; AVX2-NEXT: movq 1224(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1472(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1480(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1088(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1096(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1344(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1352(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1248(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1256(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1504(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1512(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1120(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1128(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1376(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1384(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1184(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1192(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1440(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1448(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1056(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1064(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1312(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1320(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1264(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1272(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1520(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1528(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1136(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1144(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1392(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1400(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1200(%rsp,%rsi), %r11 +; AVX2-NEXT: movq 1208(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %r11, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1456(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1464(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1072(%rsp,%rsi), %r12 +; AVX2-NEXT: movq 1080(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %r12, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1328(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1336(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rdx, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1232(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1240(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rax, %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1488(%rsp,%rsi), %rbp +; AVX2-NEXT: movq 1496(%rsp,%rsi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rbp, %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1104(%rsp,%rsi), %rax +; AVX2-NEXT: movq 1112(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: shldq %cl, %rax, %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1360(%rsp,%rsi), %r10 +; AVX2-NEXT: movq 1368(%rsp,%rsi), %r8 +; AVX2-NEXT: movq %r8, %rdx +; AVX2-NEXT: shldq %cl, %r10, %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1168(%rsp,%rsi), %r9 +; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1176(%rsp,%rsi), %rbx +; AVX2-NEXT: movq %rbx, %rdx +; AVX2-NEXT: shldq %cl, %r9, %rdx +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1424(%rsp,%rsi), %r9 +; AVX2-NEXT: movq 1432(%rsp,%rsi), %rdx +; AVX2-NEXT: movq %rdx, %r14 +; AVX2-NEXT: shldq %cl, %r9, %r14 +; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1296(%rsp,%rsi), %r15 +; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 1304(%rsp,%rsi), %r14 +; AVX2-NEXT: movq %r14, %r13 +; AVX2-NEXT: shldq %cl, %r15, %r13 +; AVX2-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, (%rsp) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq 1048(%rsp,%rsi), %rdx +; AVX2-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: shldq %cl, %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX2-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX2-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX2-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r14, %rbx +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r14, %r11 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r14, %r12 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r14, %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r14, %r13 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r14, %rbp +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r14, %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r14, %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, %r14 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r15, %r9 +; AVX2-NEXT: andq 384(%rdi), %r9 +; AVX2-NEXT: andq 128(%rdi), %r14 +; AVX2-NEXT: andq 320(%rdi), %r10 +; AVX2-NEXT: orq %r9, %r14 +; AVX2-NEXT: movq %r14, %r15 +; AVX2-NEXT: andq 64(%rdi), %rax +; AVX2-NEXT: orq %r10, %rax +; AVX2-NEXT: andq 448(%rdi), %rbp +; AVX2-NEXT: andq 192(%rdi), %r13 +; AVX2-NEXT: orq %rbp, %r13 +; AVX2-NEXT: orq %rax, %r13 +; AVX2-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: andq 288(%rdi), %r8 +; AVX2-NEXT: andq 32(%rdi), %r12 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 416(%rdi), %rax +; AVX2-NEXT: orq %r8, %r12 +; AVX2-NEXT: andq 160(%rdi), %r11 +; AVX2-NEXT: orq %rax, %r11 +; AVX2-NEXT: andq 352(%rdi), %rbx +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 96(%rdi), %rax +; AVX2-NEXT: orq %r12, %r11 +; AVX2-NEXT: orq %rbx, %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT: andq 480(%rdi), %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX2-NEXT: andq 224(%rdi), %r13 +; AVX2-NEXT: orq %r10, %r13 +; AVX2-NEXT: orq %rax, %r13 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 272(%rdi), %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 16(%rdi), %rax +; AVX2-NEXT: orq %r11, %r13 +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: movq %rax, %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; AVX2-NEXT: andq 400(%rdi), %r9 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 144(%rdi), %rax +; AVX2-NEXT: orq %r9, %rax +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: movq %rax, %r9 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT: andq 336(%rdi), %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 80(%rdi), %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 464(%rdi), %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; AVX2-NEXT: andq 208(%rdi), %r11 +; AVX2-NEXT: orq %r10, %rax +; AVX2-NEXT: orq %r8, %r11 +; AVX2-NEXT: orq %rax, %r11 +; AVX2-NEXT: orq %r9, %r11 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; AVX2-NEXT: andq 304(%rdi), %r9 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 48(%rdi), %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT: andq 432(%rdi), %r10 +; AVX2-NEXT: movq (%rsp), %rax # 8-byte Reload +; AVX2-NEXT: andq 176(%rdi), %rax +; AVX2-NEXT: orq %r9, %r8 +; AVX2-NEXT: movq %r8, %r9 +; AVX2-NEXT: orq %r10, %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 368(%rdi), %r8 +; AVX2-NEXT: orq %r9, %rax +; AVX2-NEXT: movq %rax, %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 112(%rdi), %rax +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 496(%rdi), %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; AVX2-NEXT: andq 240(%rdi), %r9 +; AVX2-NEXT: orq %r8, %r9 +; AVX2-NEXT: orq %rax, %r9 +; AVX2-NEXT: orq %r10, %r9 +; AVX2-NEXT: orq %r11, %r9 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT: andq 392(%rdi), %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; AVX2-NEXT: andq 136(%rdi), %rbp +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 328(%rdi), %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 72(%rdi), %rax +; AVX2-NEXT: orq %r10, %rbp +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: movq %rax, %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 456(%rdi), %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; AVX2-NEXT: andq 200(%rdi), %r12 +; AVX2-NEXT: orq %rax, %r12 +; AVX2-NEXT: orq %r8, %r12 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT: andq 296(%rdi), %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 40(%rdi), %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; AVX2-NEXT: andq 424(%rdi), %r11 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 168(%rdi), %rax +; AVX2-NEXT: orq %r10, %r8 +; AVX2-NEXT: movq %r8, %r10 +; AVX2-NEXT: orq %r11, %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 360(%rdi), %r8 +; AVX2-NEXT: orq %r10, %rax +; AVX2-NEXT: movq %rax, %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 104(%rdi), %rax +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: movq %rax, %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 488(%rdi), %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX2-NEXT: andq 232(%rdi), %r14 +; AVX2-NEXT: orq %rax, %r14 +; AVX2-NEXT: orq %r8, %r14 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 280(%rdi), %r8 +; AVX2-NEXT: orq %r10, %r14 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 24(%rdi), %rax +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: movq %rax, %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 408(%rdi), %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 152(%rdi), %rax +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: orq %r10, %rax +; AVX2-NEXT: movq %rax, %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; AVX2-NEXT: andq 344(%rdi), %r11 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 88(%rdi), %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 472(%rdi), %rax +; AVX2-NEXT: orq %r11, %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX2-NEXT: andq 216(%rdi), %rbx +; AVX2-NEXT: orq %rax, %rbx +; AVX2-NEXT: orq %r8, %rbx +; AVX2-NEXT: orq %r10, %rbx +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 312(%rdi), %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 56(%rdi), %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT: andq 440(%rdi), %r10 +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: movq %rax, %r11 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 184(%rdi), %r8 +; AVX2-NEXT: orq %r10, %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT: andq 376(%rdi), %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 120(%rdi), %rax +; AVX2-NEXT: orq %r11, %r8 +; AVX2-NEXT: movq %r8, %r11 +; AVX2-NEXT: orq %r10, %rax +; AVX2-NEXT: movq %rax, %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: andq 504(%rdi), %r8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: andq 248(%rdi), %rax +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: orq %r10, %rax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT: shldq %cl, %r8, %r10 +; AVX2-NEXT: orq %r11, %rax +; AVX2-NEXT: movq 1040(%rsp,%rsi), %rsi +; AVX2-NEXT: orq %rbx, %rax +; AVX2-NEXT: movq %rax, %r8 +; AVX2-NEXT: shlxq %rcx, %rsi, %rax +; AVX2-NEXT: andq 256(%rdi), %r10 +; AVX2-NEXT: andq (%rdi), %rax +; AVX2-NEXT: orq %r10, %rax +; AVX2-NEXT: orq %r15, %rax +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; AVX2-NEXT: orq %r13, %rax +; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx +; AVX2-NEXT: shldq %cl, %rsi, %rdx +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: andq 264(%rdi), %rcx +; AVX2-NEXT: andq 8(%rdi), %rdx +; AVX2-NEXT: orq %r9, %rax +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: orq %rbp, %rdx +; AVX2-NEXT: orq %r12, %rdx +; AVX2-NEXT: orq %r14, %rdx +; AVX2-NEXT: orq %r8, %rdx +; AVX2-NEXT: orq %rax, %rdx +; AVX2-NEXT: setne %al +; AVX2-NEXT: addq $1560, %rsp # imm = 0x618 +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ne_i4096: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: subq $1560, %rsp # imm = 0x618 +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: movl %esi, %eax +; AVX512-NEXT: andl $4032, %eax # imm = 0xFC0 +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0] +; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: andl $63, %ecx +; AVX512-NEXT: shrl $3, %eax +; AVX512-NEXT: negl %eax +; AVX512-NEXT: movslq %eax, %rsi +; AVX512-NEXT: movq 1280(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1288(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1536(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1544(%rsp,%rsi), %rax +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1152(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1160(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1408(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1416(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1216(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, (%rsp) # 8-byte Spill +; AVX512-NEXT: movq 1224(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1472(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1480(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1088(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1096(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1344(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1352(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1248(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1256(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1504(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1512(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1120(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1128(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1376(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1384(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1184(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1192(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1440(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1448(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1056(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1064(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1312(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1320(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1264(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1272(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1520(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1528(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1136(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1144(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1392(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1400(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1200(%rsp,%rsi), %r10 +; AVX512-NEXT: movq 1208(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %r10, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1456(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1464(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1072(%rsp,%rsi), %r14 +; AVX512-NEXT: movq 1080(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %r14, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1328(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1336(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rdx, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1232(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1240(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rax, %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1488(%rsp,%rsi), %r12 +; AVX512-NEXT: movq 1496(%rsp,%rsi), %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %r12, %rax +; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1104(%rsp,%rsi), %rax +; AVX512-NEXT: movq 1112(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: shldq %cl, %rax, %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1360(%rsp,%rsi), %r11 +; AVX512-NEXT: movq 1368(%rsp,%rsi), %rbx +; AVX512-NEXT: movq %rbx, %rdx +; AVX512-NEXT: shldq %cl, %r11, %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1168(%rsp,%rsi), %r9 +; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1176(%rsp,%rsi), %r8 +; AVX512-NEXT: movq %r8, %rdx +; AVX512-NEXT: shldq %cl, %r9, %rdx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1424(%rsp,%rsi), %r9 +; AVX512-NEXT: movq 1432(%rsp,%rsi), %rdx +; AVX512-NEXT: movq %rdx, %r15 +; AVX512-NEXT: shldq %cl, %r9, %r15 +; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1296(%rsp,%rsi), %rbp +; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 1304(%rsp,%rsi), %r15 +; AVX512-NEXT: movq %r15, %r13 +; AVX512-NEXT: shldq %cl, %rbp, %r13 +; AVX512-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, (%rsp) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq 1048(%rsp,%rsi), %rdx +; AVX512-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX512-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX512-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX512-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r15, %rbx +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r15, %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r15, %r14 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r15, %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r15, %r13 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r15, %r12 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r15, %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %r15, %r11 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; AVX512-NEXT: shldq %cl, %rbp, %r15 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; AVX512-NEXT: shldq %cl, %rbp, %r9 +; AVX512-NEXT: andq 384(%rdi), %r9 +; AVX512-NEXT: andq 128(%rdi), %r15 +; AVX512-NEXT: orq %r9, %r15 +; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: andq 320(%rdi), %r11 +; AVX512-NEXT: andq 64(%rdi), %rax +; AVX512-NEXT: orq %r11, %rax +; AVX512-NEXT: andq 448(%rdi), %r12 +; AVX512-NEXT: andq 192(%rdi), %r13 +; AVX512-NEXT: orq %r12, %r13 +; AVX512-NEXT: orq %rax, %r13 +; AVX512-NEXT: andq 288(%rdi), %r8 +; AVX512-NEXT: andq 32(%rdi), %r14 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 416(%rdi), %rax +; AVX512-NEXT: orq %r8, %r14 +; AVX512-NEXT: andq 160(%rdi), %r10 +; AVX512-NEXT: orq %rax, %r10 +; AVX512-NEXT: andq 352(%rdi), %rbx +; AVX512-NEXT: orq %r14, %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 96(%rdi), %rax +; AVX512-NEXT: orq %rbx, %rax +; AVX512-NEXT: movq %rax, %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 480(%rdi), %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; AVX512-NEXT: andq 224(%rdi), %r15 +; AVX512-NEXT: orq %rax, %r15 +; AVX512-NEXT: orq %r8, %r15 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 272(%rdi), %r8 +; AVX512-NEXT: orq %r10, %r15 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 16(%rdi), %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: movq %rax, %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; AVX512-NEXT: andq 400(%rdi), %r9 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 144(%rdi), %rax +; AVX512-NEXT: orq %r9, %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: movq %rax, %r9 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX512-NEXT: andq 336(%rdi), %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 80(%rdi), %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 464(%rdi), %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; AVX512-NEXT: andq 208(%rdi), %r11 +; AVX512-NEXT: orq %r10, %rax +; AVX512-NEXT: orq %r8, %r11 +; AVX512-NEXT: orq %rax, %r11 +; AVX512-NEXT: orq %r9, %r11 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX512-NEXT: andq 304(%rdi), %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 48(%rdi), %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; AVX512-NEXT: andq 432(%rdi), %r9 +; AVX512-NEXT: movq (%rsp), %r8 # 8-byte Reload +; AVX512-NEXT: andq 176(%rdi), %r8 +; AVX512-NEXT: orq %r10, %rax +; AVX512-NEXT: movq %rax, %r10 +; AVX512-NEXT: orq %r9, %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; AVX512-NEXT: andq 368(%rdi), %r9 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 112(%rdi), %rax +; AVX512-NEXT: orq %r10, %r8 +; AVX512-NEXT: movq %r8, %r10 +; AVX512-NEXT: orq %r9, %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 496(%rdi), %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; AVX512-NEXT: andq 240(%rdi), %r9 +; AVX512-NEXT: orq %r8, %r9 +; AVX512-NEXT: orq %rax, %r9 +; AVX512-NEXT: orq %r10, %r9 +; AVX512-NEXT: orq %r11, %r9 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX512-NEXT: andq 392(%rdi), %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; AVX512-NEXT: andq 136(%rdi), %rbp +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 328(%rdi), %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 72(%rdi), %rax +; AVX512-NEXT: orq %r10, %rbp +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: movq %rax, %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 456(%rdi), %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; AVX512-NEXT: andq 200(%rdi), %r12 +; AVX512-NEXT: orq %rax, %r12 +; AVX512-NEXT: orq %r8, %r12 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 296(%rdi), %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 40(%rdi), %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: movq %rax, %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 424(%rdi), %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 168(%rdi), %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: orq %r10, %rax +; AVX512-NEXT: movq %rax, %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 360(%rdi), %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 104(%rdi), %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: movq %rax, %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 488(%rdi), %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; AVX512-NEXT: andq 232(%rdi), %r14 +; AVX512-NEXT: orq %rax, %r14 +; AVX512-NEXT: orq %r8, %r14 +; AVX512-NEXT: orq %r10, %r14 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 280(%rdi), %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 24(%rdi), %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: movq %rax, %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 408(%rdi), %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 152(%rdi), %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: orq %r10, %rax +; AVX512-NEXT: movq %rax, %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; AVX512-NEXT: andq 344(%rdi), %r11 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 88(%rdi), %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 472(%rdi), %rax +; AVX512-NEXT: orq %r11, %r8 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX512-NEXT: andq 216(%rdi), %rbx +; AVX512-NEXT: orq %rax, %rbx +; AVX512-NEXT: orq %r8, %rbx +; AVX512-NEXT: orq %r10, %rbx +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX512-NEXT: andq 312(%rdi), %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 56(%rdi), %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 440(%rdi), %r8 +; AVX512-NEXT: orq %r10, %rax +; AVX512-NEXT: movq %rax, %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 184(%rdi), %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 376(%rdi), %r8 +; AVX512-NEXT: orq %r10, %rax +; AVX512-NEXT: movq %rax, %r11 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 120(%rdi), %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: movq %rax, %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 504(%rdi), %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: andq 248(%rdi), %r8 +; AVX512-NEXT: orq %rax, %r8 +; AVX512-NEXT: orq %r10, %r8 +; AVX512-NEXT: orq %r11, %r8 +; AVX512-NEXT: movq 1040(%rsp,%rsi), %rax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX512-NEXT: shldq %cl, %rsi, %r10 +; AVX512-NEXT: orq %rbx, %r8 +; AVX512-NEXT: shlxq %rcx, %rax, %rsi +; AVX512-NEXT: andq 256(%rdi), %r10 +; AVX512-NEXT: andq (%rdi), %rsi +; AVX512-NEXT: orq %r10, %rsi +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; AVX512-NEXT: orq %r13, %rsi +; AVX512-NEXT: orq %r15, %rsi +; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx +; AVX512-NEXT: shldq %cl, %rax, %rdx +; AVX512-NEXT: orq %r9, %rsi +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: andq 264(%rdi), %rax +; AVX512-NEXT: andq 8(%rdi), %rdx +; AVX512-NEXT: orq %rax, %rdx +; AVX512-NEXT: orq %rbp, %rdx +; AVX512-NEXT: orq %r12, %rdx +; AVX512-NEXT: orq %r14, %rdx +; AVX512-NEXT: orq %r8, %rdx +; AVX512-NEXT: orq %rsi, %rdx +; AVX512-NEXT: setne %al +; AVX512-NEXT: addq $1560, %rsp # imm = 0x618 +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %rem = and i32 %position, 4095 %ofs = zext nneg i32 %rem to i4096 %bit = shl nuw i4096 1, %ofs @@ -1812,8 +7161,8 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $64, %esp -; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: subl $80, %esp +; X86-NEXT: movzbl 12(%ebp), %ecx ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -1826,41 +7175,52 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind { ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $12, %al ; X86-NEXT: negb %al -; X86-NEXT: movsbl %al, %edi -; X86-NEXT: movl 36(%esp,%edi), %edx -; X86-NEXT: movl 40(%esp,%edi), %ebx -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: movl 32(%esp,%edi), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 44(%esp,%edi), %edi -; X86-NEXT: shldl %cl, %ebx, %edi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shll %cl, %ebx -; X86-NEXT: notl %ebx -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: andl $96, %eax -; X86-NEXT: shrl $3, %eax -; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: movl (%ecx,%eax), %eax -; X86-NEXT: andl %ebx, (%ecx) -; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movl 56(%esp,%eax), %esi +; X86-NEXT: movl 60(%esp,%eax), %edx +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%esp,%eax), %edi +; X86-NEXT: movl 52(%esp,%eax), %eax +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edi, %eax +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl 8(%ebx), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %esi, %ecx +; X86-NEXT: movl (%ebx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %edi, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: movl 12(%ebx), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl 4(%ebx), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl %ebx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: notl %ecx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: shldl %cl, %ebx, %edx +; X86-NEXT: notl %ebx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: notl %edx -; X86-NEXT: movl 8(%ebp), %ebx -; X86-NEXT: andl %edx, 4(%ebx) -; X86-NEXT: notl %esi -; X86-NEXT: andl %esi, 8(%ebx) +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: notl %edi -; X86-NEXT: andl %edi, 12(%ebx) -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: jae .LBB22_2 +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl (%eax), %eax +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl %ebx, 8(%esi) +; X86-NEXT: movl %ecx, 12(%esi) +; X86-NEXT: movl %edi, (%esi) +; X86-NEXT: movl %edx, 4(%esi) +; X86-NEXT: je .LBB22_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: .LBB22_2: @@ -1882,75 +7242,52 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind { ; SSE-NEXT: testb $64, %cl ; SSE-NEXT: cmovneq %rsi, %r8 ; SSE-NEXT: cmovneq %rax, %rsi +; SSE-NEXT: movq (%rdi), %rcx +; SSE-NEXT: movq 8(%rdi), %r9 +; SSE-NEXT: movq %r9, %r10 +; SSE-NEXT: andq %r8, %r10 ; SSE-NEXT: notq %r8 +; SSE-NEXT: movq %rcx, %r11 +; SSE-NEXT: andq %rsi, %r11 ; SSE-NEXT: notq %rsi -; SSE-NEXT: movl %ecx, %r9d -; SSE-NEXT: andl $96, %r9d -; SSE-NEXT: shrl $3, %r9d -; SSE-NEXT: movl (%rdi,%r9), %r9d -; SSE-NEXT: btl %ecx, %r9d -; SSE-NEXT: jb .LBB22_2 +; SSE-NEXT: andq %r9, %r8 +; SSE-NEXT: andq %rcx, %rsi +; SSE-NEXT: orq %r10, %r11 +; SSE-NEXT: jne .LBB22_2 ; SSE-NEXT: # %bb.1: ; SSE-NEXT: movl (%rdx), %eax ; SSE-NEXT: .LBB22_2: -; SSE-NEXT: andq %r8, 8(%rdi) -; SSE-NEXT: andq %rsi, (%rdi) +; SSE-NEXT: movq %rsi, (%rdi) +; SSE-NEXT: movq %r8, 8(%rdi) ; SSE-NEXT: # kill: def $eax killed $eax killed $rax ; SSE-NEXT: retq ; -; AVX2-LABEL: reset_multiload_i128: -; AVX2: # %bb.0: -; AVX2-NEXT: movl %esi, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: movl $1, %r8d -; AVX2-NEXT: xorl %esi, %esi -; AVX2-NEXT: shldq %cl, %r8, %rsi -; AVX2-NEXT: shlxq %rcx, %r8, %r8 -; AVX2-NEXT: testb $64, %cl -; AVX2-NEXT: cmovneq %r8, %rsi -; AVX2-NEXT: cmovneq %rax, %r8 -; AVX2-NEXT: notq %rsi -; AVX2-NEXT: notq %r8 -; AVX2-NEXT: movl %ecx, %r9d -; AVX2-NEXT: andl $96, %r9d -; AVX2-NEXT: shrl $3, %r9d -; AVX2-NEXT: movl (%rdi,%r9), %r9d -; AVX2-NEXT: btl %ecx, %r9d -; AVX2-NEXT: jb .LBB22_2 -; AVX2-NEXT: # %bb.1: -; AVX2-NEXT: movl (%rdx), %eax -; AVX2-NEXT: .LBB22_2: -; AVX2-NEXT: andq %rsi, 8(%rdi) -; AVX2-NEXT: andq %r8, (%rdi) -; AVX2-NEXT: # kill: def $eax killed $eax killed $rax -; AVX2-NEXT: retq -; -; AVX512-LABEL: reset_multiload_i128: -; AVX512: # %bb.0: -; AVX512-NEXT: movl %esi, %ecx -; AVX512-NEXT: movl $1, %r8d -; AVX512-NEXT: xorl %esi, %esi -; AVX512-NEXT: shldq %cl, %r8, %rsi -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: shlxq %rcx, %r8, %r8 -; AVX512-NEXT: testb $64, %cl -; AVX512-NEXT: cmovneq %r8, %rsi -; AVX512-NEXT: cmovneq %rax, %r8 -; AVX512-NEXT: notq %rsi -; AVX512-NEXT: notq %r8 -; AVX512-NEXT: movl %ecx, %r9d -; AVX512-NEXT: andl $96, %r9d -; AVX512-NEXT: shrl $3, %r9d -; AVX512-NEXT: movl (%rdi,%r9), %r9d -; AVX512-NEXT: btl %ecx, %r9d -; AVX512-NEXT: jb .LBB22_2 -; AVX512-NEXT: # %bb.1: -; AVX512-NEXT: movl (%rdx), %eax -; AVX512-NEXT: .LBB22_2: -; AVX512-NEXT: andq %rsi, 8(%rdi) -; AVX512-NEXT: andq %r8, (%rdi) -; AVX512-NEXT: # kill: def $eax killed $eax killed $rax -; AVX512-NEXT: retq +; AVX-LABEL: reset_multiload_i128: +; AVX: # %bb.0: +; AVX-NEXT: movl %esi, %ecx +; AVX-NEXT: movl $1, %esi +; AVX-NEXT: xorl %r8d, %r8d +; AVX-NEXT: shldq %cl, %rsi, %r8 +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: shlxq %rcx, %rsi, %r9 +; AVX-NEXT: testb $64, %cl +; AVX-NEXT: cmovneq %r9, %r8 +; AVX-NEXT: cmovneq %rax, %r9 +; AVX-NEXT: movq (%rdi), %r10 +; AVX-NEXT: movq 8(%rdi), %r11 +; AVX-NEXT: andnq %r11, %r8, %rcx +; AVX-NEXT: andq %r8, %r11 +; AVX-NEXT: andnq %r10, %r9, %rsi +; AVX-NEXT: andq %r9, %r10 +; AVX-NEXT: orq %r11, %r10 +; AVX-NEXT: jne .LBB22_2 +; AVX-NEXT: # %bb.1: +; AVX-NEXT: movl (%rdx), %eax +; AVX-NEXT: .LBB22_2: +; AVX-NEXT: movq %rsi, (%rdi) +; AVX-NEXT: movq %rcx, 8(%rdi) +; AVX-NEXT: # kill: def $eax killed $eax killed $rax +; AVX-NEXT: retq %rem = and i32 %position, 127 %ofs = zext nneg i32 %rem to i128 %bit = shl nuw i128 1, %ofs diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll index e73ff79..f270f8f 100644 --- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -7,7 +7,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hip $hsp $ip $mxcsr $rflags $rip $riz $rsp $sp $sph $spl $ss $ssp $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $r11b $r11bh $r11d $r11w $r11wh $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $k0_k1 $k2_k3 $k4_k5 $k6_k7 $tmmcfg $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $tmm0_tmm1 $tmm2_tmm3 $tmm4_tmm5 $tmm6_tmm7 $r16 $r17 $r18 $r19 $r20 $r21 $r22 $r23 $r24 $r25 $r26 $r27 $r28 $r29 $r30 $r31 $r16b $r17b $r18b $r19b $r20b $r21b $r22b $r23b $r24b $r25b $r26b $r27b $r28b $r29b $r30b $r31b $r16bh $r17bh $r18bh $r19bh $r20bh $r21bh $r22bh $r23bh $r24bh $r25bh $r26bh $r27bh $r28bh $r29bh $r30bh $r31bh $r16d $r17d $r18d $r19d $r20d $r21d $r22d $r23d $r24d $r25d $r26d $r27d $r28d $r29d $r30d $r31d $r16w $r17w $r18w $r19w $r20w $r21w $r22w $r23w $r24w $r25w $r26w $r27w $r28w $r29w $r30w $r31w $r16wh $r17wh $r18wh $r19wh $r20wh $r21wh $r22wh $r23wh $r24wh $r25wh $r26wh $r27wh $r28wh $r29wh $r30wh $r31wh +; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hip $hsp $ip $mxcsr $rflags $rip $riz $rsp $sp $sph $spl $ss $ssp $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $r11b $r11bh $r11d $r11w $r11wh $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $k0_k1 $k2_k3 $k4_k5 $k6_k7 $tmmcfg $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $r16 $r17 $r18 $r19 $r20 $r21 $r22 $r23 $r24 $r25 $r26 $r27 $r28 $r29 $r30 $r31 $r16b $r17b $r18b $r19b $r20b $r21b $r22b $r23b $r24b $r25b $r26b $r27b $r28b $r29b $r30b $r31b $r16bh $r17bh $r18bh $r19bh $r20bh $r21bh $r22bh $r23bh $r24bh $r25bh $r26bh $r27bh $r28bh $r29bh $r30bh $r31bh $r16d $r17d $r18d $r19d $r20d $r21d $r22d $r23d $r24d $r25d $r26d $r27d $r28d $r29d $r30d $r31d $r16w $r17w $r18w $r19w $r20w $r21w $r22w $r23w $r24w $r25w $r26w $r27w $r28w $r29w $r30w $r31w $r16wh $r17wh $r18wh $r19wh $r20wh $r21wh $r22wh $r23wh $r24wh $r25wh $r26wh $r27wh $r28wh $r29wh $r30wh $r31wh call void @bar1() call void @bar2() ret void @@ -15,7 +15,7 @@ define preserve_allcc void @foo()#0 { declare void @bar2() define preserve_nonecc void @foo2()#0 { -; CHECK: foo2 Clobbered Registers: $ah $al $ax $ch $cl $cs $cx $df $dh $di $dih $dil $dl $ds $dx $eax $ecx $edi $edx $eflags $eip $eiz $es $esi $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hax $hcx $hdi $hdx $hip $hsi $hsp $ip $mxcsr $rax $rcx $rdi $rdx $rflags $rip $riz $rsi $rsp $si $sih $sil $sp $sph $spl $ss $ssp $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r8 $r9 $r10 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm0 $xmm1 $xmm2 $xmm3 $xmm4 $xmm5 $xmm6 $xmm7 $xmm8 $xmm9 $xmm10 $xmm11 $xmm12 $xmm13 $xmm14 $xmm15 $r8b $r9b $r10b $r11b $r8bh $r9bh $r10bh $r11bh $r8d $r9d $r10d $r11d $r8w $r9w $r10w $r11w $r8wh $r9wh $r10wh $r11wh $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $k0_k1 $k2_k3 $k4_k5 $k6_k7 $tmmcfg $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $tmm0_tmm1 $tmm2_tmm3 $tmm4_tmm5 $tmm6_tmm7 $r16 $r17 $r18 $r19 $r20 $r21 $r22 $r23 $r24 $r25 $r26 $r27 $r28 $r29 $r30 $r31 $r16b $r17b $r18b $r19b $r20b $r21b $r22b $r23b $r24b $r25b $r26b $r27b $r28b $r29b $r30b $r31b $r16bh $r17bh $r18bh $r19bh $r20bh $r21bh $r22bh $r23bh $r24bh $r25bh $r26bh $r27bh $r28bh $r29bh $r30bh $r31bh $r16d $r17d $r18d $r19d $r20d $r21d $r22d $r23d $r24d $r25d $r26d $r27d $r28d $r29d $r30d $r31d $r16w $r17w $r18w $r19w $r20w $r21w $r22w $r23w $r24w $r25w $r26w $r27w $r28w $r29w $r30w $r31w $r16wh $r17wh $r18wh $r19wh $r20wh $r21wh $r22wh $r23wh $r24wh $r25wh $r26wh $r27wh $r28wh $r29wh $r30wh $r31wh +; CHECK: foo2 Clobbered Registers: $ah $al $ax $ch $cl $cs $cx $df $dh $di $dih $dil $dl $ds $dx $eax $ecx $edi $edx $eflags $eip $eiz $es $esi $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hax $hcx $hdi $hdx $hip $hsi $hsp $ip $mxcsr $rax $rcx $rdi $rdx $rflags $rip $riz $rsi $rsp $si $sih $sil $sp $sph $spl $ss $ssp $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r8 $r9 $r10 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm0 $xmm1 $xmm2 $xmm3 $xmm4 $xmm5 $xmm6 $xmm7 $xmm8 $xmm9 $xmm10 $xmm11 $xmm12 $xmm13 $xmm14 $xmm15 $r8b $r9b $r10b $r11b $r8bh $r9bh $r10bh $r11bh $r8d $r9d $r10d $r11d $r8w $r9w $r10w $r11w $r8wh $r9wh $r10wh $r11wh $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $k0_k1 $k2_k3 $k4_k5 $k6_k7 $tmmcfg $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $r16 $r17 $r18 $r19 $r20 $r21 $r22 $r23 $r24 $r25 $r26 $r27 $r28 $r29 $r30 $r31 $r16b $r17b $r18b $r19b $r20b $r21b $r22b $r23b $r24b $r25b $r26b $r27b $r28b $r29b $r30b $r31b $r16bh $r17bh $r18bh $r19bh $r20bh $r21bh $r22bh $r23bh $r24bh $r25bh $r26bh $r27bh $r28bh $r29bh $r30bh $r31bh $r16d $r17d $r18d $r19d $r20d $r21d $r22d $r23d $r24d $r25d $r26d $r27d $r28d $r29d $r30d $r31d $r16w $r17w $r18w $r19w $r20w $r21w $r22w $r23w $r24w $r25w $r26w $r27w $r28w $r29w $r30w $r31w $r16wh $r17wh $r18wh $r19wh $r20wh $r21wh $r22wh $r23wh $r24wh $r25wh $r26wh $r27wh $r28wh $r29wh $r30wh $r31wh call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/X86/pr165755.ll b/llvm/test/CodeGen/X86/pr165755.ll new file mode 100644 index 0000000..3ab484f --- /dev/null +++ b/llvm/test/CodeGen/X86/pr165755.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64 + +define i32 @PR165755(ptr %p0) { +; X86-LABEL: PR165755: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %eax +; X86-NEXT: movb $0, (%ecx) +; X86-NEXT: retl +; +; X64-LABEL: PR165755: +; X64: # %bb.0: +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movb $0, (%rdi) +; X64-NEXT: retq + %ld64 = load i64, ptr %p0, align 8 + store i8 0, ptr %p0, align 1 + %ld32 = load i32, ptr %p0, align 8 + %mask = and i32 %ld32, 32 + %zext = zext i32 %mask to i64 + %srl = lshr i64 %ld64, %zext + %res = trunc i64 %srl to i32 + ret i32 %res +} diff --git a/llvm/test/DebugInfo/Generic/objc-property.ll b/llvm/test/DebugInfo/Generic/objc-property.ll index 007d1fe..1ee7929 100644 --- a/llvm/test/DebugInfo/Generic/objc-property.ll +++ b/llvm/test/DebugInfo/Generic/objc-property.ll @@ -5,33 +5,33 @@ ; CHECK: DW_TAG_structure_type ; CHECK: DW_AT_name ("Foo") ; -; CHECK: DW_TAG_APPLE_property +; CHECK: 0x[[AUTO_SYNTH:[0-9a-f]+]]: DW_TAG_APPLE_property ; CHECK: DW_AT_APPLE_property_name ("autoSynthProp") ; CHECK: DW_AT_APPLE_property_attribute ; CHECK-SAME: DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite, ; CHECK-SAME: DW_APPLE_PROPERTY_atomic, DW_APPLE_PROPERTY_unsafe_unretained ; -; CHECK: DW_TAG_APPLE_property +; CHECK: 0x[[SYNTH:[0-9a-f]+]]: DW_TAG_APPLE_property ; CHECK: DW_AT_APPLE_property_name ("synthProp") ; CHECK: DW_AT_APPLE_property_attribute ; CHECK-SAME: DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite, ; CHECK-SAME: DW_APPLE_PROPERTY_atomic, DW_APPLE_PROPERTY_unsafe_unretained ; -; CHECK: DW_TAG_APPLE_property +; CHECK: 0x[[GET:[0-9a-f]+]]: DW_TAG_APPLE_property ; CHECK: DW_AT_APPLE_property_name ("customGetterProp") ; CHECK: DW_AT_APPLE_property_getter ("customGetter") ; CHECK: DW_AT_APPLE_property_attribute ; CHECK-SAME: DW_APPLE_PROPERTY_getter, DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite, ; CHECK-SAME: DW_APPLE_PROPERTY_atomic, DW_APPLE_PROPERTY_unsafe_unretained ; -; CHECK: DW_TAG_APPLE_property +; CHECK: 0x[[SET:[0-9a-f]+]]: DW_TAG_APPLE_property ; CHECK: DW_AT_APPLE_property_name ("customSetterProp") ; CHECK: DW_AT_APPLE_property_setter ("customSetter:") ; CHECK: DW_AT_APPLE_property_attribute ; CHECK-SAME: DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite, ; CHECK-SAME: DW_APPLE_PROPERTY_setter, DW_APPLE_PROPERTY_atomic, DW_APPLE_PROPERTY_unsafe_unretained ; -; CHECK: DW_TAG_APPLE_property +; CHECK: 0x[[ACCESSORS:[0-9a-f]+]]: DW_TAG_APPLE_property ; CHECK: DW_AT_APPLE_property_name ("customAccessorsProp") ; CHECK: DW_AT_APPLE_property_getter ("customGetter") ; CHECK: DW_AT_APPLE_property_setter ("customSetter:") @@ -39,15 +39,21 @@ ; CHECK-SAME: DW_APPLE_PROPERTY_getter, DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite, ; CHECK-SAME: DW_APPLE_PROPERTY_setter, DW_APPLE_PROPERTY_atomic, DW_APPLE_PROPERTY_unsafe_unretained ; -; FIXME: missing link between DW_TAG_member and the associated DW_TAG_APPLE_property ; CHECK: DW_TAG_member -; CHECK-NOT: DW_AT_APPLE_property +; CHECK: DW_AT_name ("someBackingIvar") +; CHECK: DW_AT_APPLE_property (0x[[SYNTH]] "synthProp") +; ; CHECK: DW_TAG_member -; CHECK-NOT: DW_AT_APPLE_property +; CHECK: DW_AT_name ("_autoSynthProp") +; CHECK: DW_AT_APPLE_property (0x[[AUTO_SYNTH]] "autoSynthProp") +; ; CHECK: DW_TAG_member -; CHECK-NOT: DW_AT_APPLE_property +; CHECK: DW_AT_name ("_customGetterProp") +; CHECK: DW_AT_APPLE_property (0x[[GET]] "customGetterProp") +; ; CHECK: DW_TAG_member -; CHECK-NOT: DW_AT_APPLE_property +; CHECK: DW_AT_name ("_customSetterProp") +; CHECK: DW_AT_APPLE_property (0x[[SET]] "customSetterProp") !llvm.module.flags = !{!0, !1} !llvm.dbg.cu = !{!2} diff --git a/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test b/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test new file mode 100755 index 0000000..aa3f6dc --- /dev/null +++ b/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test @@ -0,0 +1,13 @@ +; Test that the native PDB reader isn't crashed by index value bigger than +; number of types in TPI or IPI stream +; RUN: llvm-pdbutil dump %p/../Inputs/empty.pdb --type-index=20000000\ +; RUN: | FileCheck -check-prefixes=TYPES,NOT_FOUND %s +; RUN: llvm-pdbutil dump %p/../Inputs/empty.pdb --id-index=20000000\ +; RUN: | FileCheck -check-prefixes=IDS,NOT_FOUND %s + +TYPES: Types (TPI Stream) +IDS: Types (IPI Stream) +NOT_FOUND:============================================================ +NOT_FOUND: Showing 1 records. +NOT_FOUND: Type 0x1312D00 doesn't exist in TPI stream + diff --git a/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll b/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll new file mode 100644 index 0000000..1d11856 --- /dev/null +++ b/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; Test basic type sanitizer instrumentation. +; +; RUN: opt -passes='tysan' -tysan-outline-instrumentation -S %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +;. +; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @tysan.module_ctor, ptr null }] +; CHECK: @__tysan_v1_Simple_20C_2b_2b_20TBAA = linkonce_odr constant { i64, i64, [16 x i8] } { i64 2, i64 0, [16 x i8] c"Simple C++ TBAA\00" }, comdat +; CHECK: @__tysan_v1_omnipotent_20char = linkonce_odr constant { i64, i64, ptr, i64, [16 x i8] } { i64 2, i64 1, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, i64 0, [16 x i8] c"omnipotent char\00" }, comdat +; CHECK: @__tysan_v1_int = linkonce_odr constant { i64, i64, ptr, i64, [4 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [4 x i8] c"int\00" }, comdat +; CHECK: @__tysan_v1_int_o_0 = linkonce_odr constant { i64, ptr, ptr, i64 } { i64 1, ptr @__tysan_v1_int, ptr @__tysan_v1_int, i64 0 }, comdat +; CHECK: @__tysan_shadow_memory_address = external global i64 +; CHECK: @__tysan_app_memory_mask = external global i64 +; CHECK: @__tysan_v1___ZTS1x = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, [7 x i8] } { i64 2, i64 2, ptr @__tysan_v1_int, i64 0, ptr @__tysan_v1_int, i64 4, [7 x i8] c"_ZTS1x\00" }, comdat +; CHECK: @__tysan_v1___ZTS1v = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, ptr, i64, [7 x i8] } { i64 2, i64 3, ptr @__tysan_v1_int, i64 8, ptr @__tysan_v1_int, i64 12, ptr @__tysan_v1___ZTS1x, i64 16, [7 x i8] c"_ZTS1v\00" }, comdat +; CHECK: @__tysan_v1___ZTS1v_o_12 = linkonce_odr constant { i64, ptr, ptr, i64 } { i64 1, ptr @__tysan_v1___ZTS1v, ptr @__tysan_v1_int, i64 12 }, comdat +; CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_int_o_0, ptr @__tysan_v1___ZTS1x, ptr @__tysan_v1___ZTS1v, ptr @__tysan_v1___ZTS1v_o_12], section "llvm.metadata" +;. +define i32 @test_load(ptr %a) sanitize_type { +; CHECK-LABEL: @test_load( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 +; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 +; CHECK-NEXT: call void @__tysan_instrument_with_shadow_update(ptr [[A:%.*]], ptr @__tysan_v1_int_o_0, i1 true, i64 4, i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %tmp1 = load i32, ptr %a, align 4, !tbaa !3 + ret i32 %tmp1 +} + +define void @test_store(ptr %a) sanitize_type { +; CHECK-LABEL: @test_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 +; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 +; CHECK-NEXT: call void @__tysan_instrument_with_shadow_update(ptr [[A:%.*]], ptr @__tysan_v1___ZTS1v_o_12, i1 true, i64 4, i32 2) +; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: ret void +; + +entry: + store i32 42, ptr %a, align 4, !tbaa !6 + ret void +} + +!0 = !{!"Simple C++ TBAA"} +!1 = !{!"omnipotent char", !0, i64 0} +!2 = !{!"int", !1, i64 0} +!3 = !{!2, !2, i64 0} +!4 = !{!"_ZTS1x", !2, i64 0, !2, i64 4} +!5 = !{!"_ZTS1v", !2, i64 8, !2, i64 12, !4, i64 16} +!6 = !{!5, !2, i64 12} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { sanitize_type } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind } +;. +; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C++ TBAA"} +; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META1]], i64 12} +; CHECK: [[META5]] = !{!"_ZTS1v", [[META1]], i64 8, [[META1]], i64 12, [[META6:![0-9]+]], i64 16} +; CHECK: [[META6]] = !{!"_ZTS1x", [[META1]], i64 0, [[META1]], i64 4} +;. diff --git a/llvm/test/Instrumentation/TypeSanitizer/basic_verify_outlined.ll b/llvm/test/Instrumentation/TypeSanitizer/basic_verify_outlined.ll new file mode 100644 index 0000000..187a41e --- /dev/null +++ b/llvm/test/Instrumentation/TypeSanitizer/basic_verify_outlined.ll @@ -0,0 +1,736 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; Test basic type sanitizer instrumentation. +; +; RUN: opt -passes='tysan' -S -tysan-outline-instrumentation -tysan-verify-outlined-instrumentation -S %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +;. +; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @tysan.module_ctor, ptr null }] +; CHECK: @__tysan_v1_Simple_20C_2b_2b_20TBAA = linkonce_odr constant { i64, i64, [16 x i8] } { i64 2, i64 0, [16 x i8] c"Simple C++ TBAA\00" }, comdat +; CHECK: @__tysan_v1_omnipotent_20char = linkonce_odr constant { i64, i64, ptr, i64, [16 x i8] } { i64 2, i64 1, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, i64 0, [16 x i8] c"omnipotent char\00" }, comdat +; CHECK: @__tysan_v1_int = linkonce_odr constant { i64, i64, ptr, i64, [4 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [4 x i8] c"int\00" }, comdat +; CHECK: @__tysan_v1_int_o_0 = linkonce_odr constant { i64, ptr, ptr, i64 } { i64 1, ptr @__tysan_v1_int, ptr @__tysan_v1_int, i64 0 }, comdat +; CHECK: @__tysan_shadow_memory_address = external global i64 +; CHECK: @__tysan_app_memory_mask = external global i64 +; CHECK: @__tysan_v1___ZTS1x = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, [7 x i8] } { i64 2, i64 2, ptr @__tysan_v1_int, i64 0, ptr @__tysan_v1_int, i64 4, [7 x i8] c"_ZTS1x\00" }, comdat +; CHECK: @__tysan_v1___ZTS1v = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, ptr, i64, [7 x i8] } { i64 2, i64 3, ptr @__tysan_v1_int, i64 8, ptr @__tysan_v1_int, i64 12, ptr @__tysan_v1___ZTS1x, i64 16, [7 x i8] c"_ZTS1v\00" }, comdat +; CHECK: @__tysan_v1___ZTS1v_o_12 = linkonce_odr constant { i64, ptr, ptr, i64 } { i64 1, ptr @__tysan_v1___ZTS1v, ptr @__tysan_v1_int, i64 12 }, comdat +; CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_int_o_0, ptr @__tysan_v1___ZTS1x, ptr @__tysan_v1___ZTS1v, ptr @__tysan_v1___ZTS1v_o_12], section "llvm.metadata" +;. +define i32 @test_load(ptr %a) sanitize_type { +; CHECK-LABEL: @test_load( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[APP_MEM_MASK2:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 +; CHECK-NEXT: [[SHADOW_BASE1:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 +; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 ptrtoint (ptr @__tysan_app_memory_mask to i64), [[APP_MEM_MASK2]] +; CHECK-NEXT: [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3 +; CHECK-NEXT: [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE1]] +; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr +; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8 +; CHECK-NEXT: [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], null +; CHECK-NEXT: br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP42:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne ptr [[TMP5]], null +; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[SHADOW_PTR_INT]], 16 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne ptr [[TMP10]], null +; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP7]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null +; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[SHADOW_PTR_INT]], 32 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne ptr [[TMP20]], null +; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP17]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 40 +; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr +; CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne ptr [[TMP25]], null +; CHECK-NEXT: [[TMP27:%.*]] = or i1 [[TMP22]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[SHADOW_PTR_INT]], 48 +; CHECK-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr +; CHECK-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne ptr [[TMP30]], null +; CHECK-NEXT: [[TMP32:%.*]] = or i1 [[TMP27]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[SHADOW_PTR_INT]], 56 +; CHECK-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr +; CHECK-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP34]], align 8 +; CHECK-NEXT: [[TMP36:%.*]] = icmp ne ptr [[TMP35]], null +; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP32]], [[TMP36]] +; CHECK-NEXT: br i1 [[TMP37]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF0]] +; CHECK: 38: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP39]] +; CHECK: 39: +; CHECK-NEXT: store ptr null, ptr [[SHADOW_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 16 +; CHECK-NEXT: [[SHADOW_BYTE_2_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 +; CHECK-NEXT: [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_4_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 32 +; CHECK-NEXT: [[SHADOW_BYTE_4_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_4_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -4 to ptr), ptr [[SHADOW_BYTE_4_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_5_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 40 +; CHECK-NEXT: [[SHADOW_BYTE_5_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_5_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -5 to ptr), ptr [[SHADOW_BYTE_5_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_6_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 48 +; CHECK-NEXT: [[SHADOW_BYTE_6_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_6_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -6 to ptr), ptr [[SHADOW_BYTE_6_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_7_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 56 +; CHECK-NEXT: [[SHADOW_BYTE_7_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_7_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -7 to ptr), ptr [[SHADOW_BYTE_7_PTR]], align 8 +; CHECK-NEXT: br label [[TMP41:%.*]] +; CHECK: 40: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP41]] +; CHECK: 41: +; CHECK-NEXT: br label [[TMP87:%.*]] +; CHECK: 42: +; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 +; CHECK-NEXT: [[TMP44:%.*]] = inttoptr i64 [[TMP43]] to ptr +; CHECK-NEXT: [[TMP45:%.*]] = load ptr, ptr [[TMP44]], align 8 +; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[TMP45]] to i64 +; CHECK-NEXT: [[TMP47:%.*]] = icmp sge i64 [[TMP46]], 0 +; CHECK-NEXT: [[TMP48:%.*]] = or i1 false, [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[SHADOW_PTR_INT]], 16 +; CHECK-NEXT: [[TMP50:%.*]] = inttoptr i64 [[TMP49]] to ptr +; CHECK-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8 +; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr [[TMP51]] to i64 +; CHECK-NEXT: [[TMP53:%.*]] = icmp sge i64 [[TMP52]], 0 +; CHECK-NEXT: [[TMP54:%.*]] = or i1 [[TMP48]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 +; CHECK-NEXT: [[TMP56:%.*]] = inttoptr i64 [[TMP55]] to ptr +; CHECK-NEXT: [[TMP57:%.*]] = load ptr, ptr [[TMP56]], align 8 +; CHECK-NEXT: [[TMP58:%.*]] = ptrtoint ptr [[TMP57]] to i64 +; CHECK-NEXT: [[TMP59:%.*]] = icmp sge i64 [[TMP58]], 0 +; CHECK-NEXT: [[TMP60:%.*]] = or i1 [[TMP54]], [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = add i64 [[SHADOW_PTR_INT]], 32 +; CHECK-NEXT: [[TMP62:%.*]] = inttoptr i64 [[TMP61]] to ptr +; CHECK-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP62]], align 8 +; CHECK-NEXT: [[TMP64:%.*]] = ptrtoint ptr [[TMP63]] to i64 +; CHECK-NEXT: [[TMP65:%.*]] = icmp sge i64 [[TMP64]], 0 +; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[TMP60]], [[TMP65]] +; CHECK-NEXT: [[TMP67:%.*]] = add i64 [[SHADOW_PTR_INT]], 40 +; CHECK-NEXT: [[TMP68:%.*]] = inttoptr i64 [[TMP67]] to ptr +; CHECK-NEXT: [[TMP69:%.*]] = load ptr, ptr [[TMP68]], align 8 +; CHECK-NEXT: [[TMP70:%.*]] = ptrtoint ptr [[TMP69]] to i64 +; CHECK-NEXT: [[TMP71:%.*]] = icmp sge i64 [[TMP70]], 0 +; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[TMP66]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = add i64 [[SHADOW_PTR_INT]], 48 +; CHECK-NEXT: [[TMP74:%.*]] = inttoptr i64 [[TMP73]] to ptr +; CHECK-NEXT: [[TMP75:%.*]] = load ptr, ptr [[TMP74]], align 8 +; CHECK-NEXT: [[TMP76:%.*]] = ptrtoint ptr [[TMP75]] to i64 +; CHECK-NEXT: [[TMP77:%.*]] = icmp sge i64 [[TMP76]], 0 +; CHECK-NEXT: [[TMP78:%.*]] = or i1 [[TMP72]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = add i64 [[SHADOW_PTR_INT]], 56 +; CHECK-NEXT: [[TMP80:%.*]] = inttoptr i64 [[TMP79]] to ptr +; CHECK-NEXT: [[TMP81:%.*]] = load ptr, ptr [[TMP80]], align 8 +; CHECK-NEXT: [[TMP82:%.*]] = ptrtoint ptr [[TMP81]] to i64 +; CHECK-NEXT: [[TMP83:%.*]] = icmp sge i64 [[TMP82]], 0 +; CHECK-NEXT: [[TMP84:%.*]] = or i1 [[TMP78]], [[TMP83]] +; CHECK-NEXT: br i1 [[TMP84]], label [[TMP85:%.*]], label [[TMP86:%.*]], !prof [[PROF0]] +; CHECK: 85: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP86]] +; CHECK: 86: +; CHECK-NEXT: br label [[TMP87]] +; CHECK: 87: +; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 +; CHECK-NEXT: [[APP_PTR_MASKED3:%.*]] = and i64 ptrtoint (ptr @__tysan_shadow_memory_address to i64), [[APP_MEM_MASK2]] +; CHECK-NEXT: [[APP_PTR_SHIFTED4:%.*]] = shl i64 [[APP_PTR_MASKED3]], 3 +; CHECK-NEXT: [[SHADOW_PTR_INT5:%.*]] = add i64 [[APP_PTR_SHIFTED4]], [[SHADOW_BASE1]] +; CHECK-NEXT: [[SHADOW_PTR6:%.*]] = inttoptr i64 [[SHADOW_PTR_INT5]] to ptr +; CHECK-NEXT: [[SHADOW_DESC7:%.*]] = load ptr, ptr [[SHADOW_PTR6]], align 8 +; CHECK-NEXT: [[BAD_DESC8:%.*]] = icmp ne ptr [[SHADOW_DESC7]], null +; CHECK-NEXT: br i1 [[BAD_DESC8]], label [[TMP88:%.*]], label [[TMP130:%.*]], !prof [[PROF0]] +; CHECK: 88: +; CHECK-NEXT: [[TMP89:%.*]] = icmp eq ptr [[SHADOW_DESC7]], null +; CHECK-NEXT: br i1 [[TMP89]], label [[TMP90:%.*]], label [[TMP128:%.*]] +; CHECK: 90: +; CHECK-NEXT: [[TMP91:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8 +; CHECK-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP91]] to ptr +; CHECK-NEXT: [[TMP93:%.*]] = load ptr, ptr [[TMP92]], align 8 +; CHECK-NEXT: [[TMP94:%.*]] = icmp ne ptr [[TMP93]], null +; CHECK-NEXT: [[TMP95:%.*]] = or i1 false, [[TMP94]] +; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16 +; CHECK-NEXT: [[TMP97:%.*]] = inttoptr i64 [[TMP96]] to ptr +; CHECK-NEXT: [[TMP98:%.*]] = load ptr, ptr [[TMP97]], align 8 +; CHECK-NEXT: [[TMP99:%.*]] = icmp ne ptr [[TMP98]], null +; CHECK-NEXT: [[TMP100:%.*]] = or i1 [[TMP95]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24 +; CHECK-NEXT: [[TMP102:%.*]] = inttoptr i64 [[TMP101]] to ptr +; CHECK-NEXT: [[TMP103:%.*]] = load ptr, ptr [[TMP102]], align 8 +; CHECK-NEXT: [[TMP104:%.*]] = icmp ne ptr [[TMP103]], null +; CHECK-NEXT: [[TMP105:%.*]] = or i1 [[TMP100]], [[TMP104]] +; CHECK-NEXT: [[TMP106:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32 +; CHECK-NEXT: [[TMP107:%.*]] = inttoptr i64 [[TMP106]] to ptr +; CHECK-NEXT: [[TMP108:%.*]] = load ptr, ptr [[TMP107]], align 8 +; CHECK-NEXT: [[TMP109:%.*]] = icmp ne ptr [[TMP108]], null +; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[TMP105]], [[TMP109]] +; CHECK-NEXT: [[TMP111:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40 +; CHECK-NEXT: [[TMP112:%.*]] = inttoptr i64 [[TMP111]] to ptr +; CHECK-NEXT: [[TMP113:%.*]] = load ptr, ptr [[TMP112]], align 8 +; CHECK-NEXT: [[TMP114:%.*]] = icmp ne ptr [[TMP113]], null +; CHECK-NEXT: [[TMP115:%.*]] = or i1 [[TMP110]], [[TMP114]] +; CHECK-NEXT: [[TMP116:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48 +; CHECK-NEXT: [[TMP117:%.*]] = inttoptr i64 [[TMP116]] to ptr +; CHECK-NEXT: [[TMP118:%.*]] = load ptr, ptr [[TMP117]], align 8 +; CHECK-NEXT: [[TMP119:%.*]] = icmp ne ptr [[TMP118]], null +; CHECK-NEXT: [[TMP120:%.*]] = or i1 [[TMP115]], [[TMP119]] +; CHECK-NEXT: [[TMP121:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56 +; CHECK-NEXT: [[TMP122:%.*]] = inttoptr i64 [[TMP121]] to ptr +; CHECK-NEXT: [[TMP123:%.*]] = load ptr, ptr [[TMP122]], align 8 +; CHECK-NEXT: [[TMP124:%.*]] = icmp ne ptr [[TMP123]], null +; CHECK-NEXT: [[TMP125:%.*]] = or i1 [[TMP120]], [[TMP124]] +; CHECK-NEXT: br i1 [[TMP125]], label [[TMP126:%.*]], label [[TMP127:%.*]], !prof [[PROF0]] +; CHECK: 126: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP127]] +; CHECK: 127: +; CHECK-NEXT: store ptr null, ptr [[SHADOW_PTR6]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET9:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_PTR10:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET9]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR10]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET11:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16 +; CHECK-NEXT: [[SHADOW_BYTE_2_PTR12:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET11]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR12]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET13:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24 +; CHECK-NEXT: [[SHADOW_BYTE_3_PTR14:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET13]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR14]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_4_OFFSET15:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32 +; CHECK-NEXT: [[SHADOW_BYTE_4_PTR16:%.*]] = inttoptr i64 [[SHADOW_BYTE_4_OFFSET15]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -4 to ptr), ptr [[SHADOW_BYTE_4_PTR16]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_5_OFFSET17:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40 +; CHECK-NEXT: [[SHADOW_BYTE_5_PTR18:%.*]] = inttoptr i64 [[SHADOW_BYTE_5_OFFSET17]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -5 to ptr), ptr [[SHADOW_BYTE_5_PTR18]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_6_OFFSET19:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48 +; CHECK-NEXT: [[SHADOW_BYTE_6_PTR20:%.*]] = inttoptr i64 [[SHADOW_BYTE_6_OFFSET19]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -6 to ptr), ptr [[SHADOW_BYTE_6_PTR20]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_7_OFFSET21:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56 +; CHECK-NEXT: [[SHADOW_BYTE_7_PTR22:%.*]] = inttoptr i64 [[SHADOW_BYTE_7_OFFSET21]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -7 to ptr), ptr [[SHADOW_BYTE_7_PTR22]], align 8 +; CHECK-NEXT: br label [[TMP129:%.*]] +; CHECK: 128: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP129]] +; CHECK: 129: +; CHECK-NEXT: br label [[TMP175:%.*]] +; CHECK: 130: +; CHECK-NEXT: [[TMP131:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8 +; CHECK-NEXT: [[TMP132:%.*]] = inttoptr i64 [[TMP131]] to ptr +; CHECK-NEXT: [[TMP133:%.*]] = load ptr, ptr [[TMP132]], align 8 +; CHECK-NEXT: [[TMP134:%.*]] = ptrtoint ptr [[TMP133]] to i64 +; CHECK-NEXT: [[TMP135:%.*]] = icmp sge i64 [[TMP134]], 0 +; CHECK-NEXT: [[TMP136:%.*]] = or i1 false, [[TMP135]] +; CHECK-NEXT: [[TMP137:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16 +; CHECK-NEXT: [[TMP138:%.*]] = inttoptr i64 [[TMP137]] to ptr +; CHECK-NEXT: [[TMP139:%.*]] = load ptr, ptr [[TMP138]], align 8 +; CHECK-NEXT: [[TMP140:%.*]] = ptrtoint ptr [[TMP139]] to i64 +; CHECK-NEXT: [[TMP141:%.*]] = icmp sge i64 [[TMP140]], 0 +; CHECK-NEXT: [[TMP142:%.*]] = or i1 [[TMP136]], [[TMP141]] +; CHECK-NEXT: [[TMP143:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24 +; CHECK-NEXT: [[TMP144:%.*]] = inttoptr i64 [[TMP143]] to ptr +; CHECK-NEXT: [[TMP145:%.*]] = load ptr, ptr [[TMP144]], align 8 +; CHECK-NEXT: [[TMP146:%.*]] = ptrtoint ptr [[TMP145]] to i64 +; CHECK-NEXT: [[TMP147:%.*]] = icmp sge i64 [[TMP146]], 0 +; CHECK-NEXT: [[TMP148:%.*]] = or i1 [[TMP142]], [[TMP147]] +; CHECK-NEXT: [[TMP149:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32 +; CHECK-NEXT: [[TMP150:%.*]] = inttoptr i64 [[TMP149]] to ptr +; CHECK-NEXT: [[TMP151:%.*]] = load ptr, ptr [[TMP150]], align 8 +; CHECK-NEXT: [[TMP152:%.*]] = ptrtoint ptr [[TMP151]] to i64 +; CHECK-NEXT: [[TMP153:%.*]] = icmp sge i64 [[TMP152]], 0 +; CHECK-NEXT: [[TMP154:%.*]] = or i1 [[TMP148]], [[TMP153]] +; CHECK-NEXT: [[TMP155:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40 +; CHECK-NEXT: [[TMP156:%.*]] = inttoptr i64 [[TMP155]] to ptr +; CHECK-NEXT: [[TMP157:%.*]] = load ptr, ptr [[TMP156]], align 8 +; CHECK-NEXT: [[TMP158:%.*]] = ptrtoint ptr [[TMP157]] to i64 +; CHECK-NEXT: [[TMP159:%.*]] = icmp sge i64 [[TMP158]], 0 +; CHECK-NEXT: [[TMP160:%.*]] = or i1 [[TMP154]], [[TMP159]] +; CHECK-NEXT: [[TMP161:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48 +; CHECK-NEXT: [[TMP162:%.*]] = inttoptr i64 [[TMP161]] to ptr +; CHECK-NEXT: [[TMP163:%.*]] = load ptr, ptr [[TMP162]], align 8 +; CHECK-NEXT: [[TMP164:%.*]] = ptrtoint ptr [[TMP163]] to i64 +; CHECK-NEXT: [[TMP165:%.*]] = icmp sge i64 [[TMP164]], 0 +; CHECK-NEXT: [[TMP166:%.*]] = or i1 [[TMP160]], [[TMP165]] +; CHECK-NEXT: [[TMP167:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56 +; CHECK-NEXT: [[TMP168:%.*]] = inttoptr i64 [[TMP167]] to ptr +; CHECK-NEXT: [[TMP169:%.*]] = load ptr, ptr [[TMP168]], align 8 +; CHECK-NEXT: [[TMP170:%.*]] = ptrtoint ptr [[TMP169]] to i64 +; CHECK-NEXT: [[TMP171:%.*]] = icmp sge i64 [[TMP170]], 0 +; CHECK-NEXT: [[TMP172:%.*]] = or i1 [[TMP166]], [[TMP171]] +; CHECK-NEXT: br i1 [[TMP172]], label [[TMP173:%.*]], label [[TMP174:%.*]], !prof [[PROF0]] +; CHECK: 173: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP174]] +; CHECK: 174: +; CHECK-NEXT: br label [[TMP175]] +; CHECK: 175: +; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 +; CHECK-NEXT: call void @__tysan_instrument_with_shadow_update(ptr [[A:%.*]], ptr @__tysan_v1_int_o_0, i1 true, i64 4, i32 1) +; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[APP_PTR_MASKED23:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK2]] +; CHECK-NEXT: [[APP_PTR_SHIFTED24:%.*]] = shl i64 [[APP_PTR_MASKED23]], 3 +; CHECK-NEXT: [[SHADOW_PTR_INT25:%.*]] = add i64 [[APP_PTR_SHIFTED24]], [[SHADOW_BASE1]] +; CHECK-NEXT: [[SHADOW_PTR26:%.*]] = inttoptr i64 [[SHADOW_PTR_INT25]] to ptr +; CHECK-NEXT: [[SHADOW_DESC27:%.*]] = load ptr, ptr [[SHADOW_PTR26]], align 8 +; CHECK-NEXT: [[BAD_DESC28:%.*]] = icmp ne ptr [[SHADOW_DESC27]], @__tysan_v1_int_o_0 +; CHECK-NEXT: br i1 [[BAD_DESC28]], label [[TMP176:%.*]], label [[TMP198:%.*]], !prof [[PROF0]] +; CHECK: 176: +; CHECK-NEXT: [[TMP177:%.*]] = icmp eq ptr [[SHADOW_DESC27]], null +; CHECK-NEXT: br i1 [[TMP177]], label [[TMP178:%.*]], label [[TMP196:%.*]] +; CHECK: 178: +; CHECK-NEXT: [[TMP179:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8 +; CHECK-NEXT: [[TMP180:%.*]] = inttoptr i64 [[TMP179]] to ptr +; CHECK-NEXT: [[TMP181:%.*]] = load ptr, ptr [[TMP180]], align 8 +; CHECK-NEXT: [[TMP182:%.*]] = icmp ne ptr [[TMP181]], null +; CHECK-NEXT: [[TMP183:%.*]] = or i1 false, [[TMP182]] +; CHECK-NEXT: [[TMP184:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16 +; CHECK-NEXT: [[TMP185:%.*]] = inttoptr i64 [[TMP184]] to ptr +; CHECK-NEXT: [[TMP186:%.*]] = load ptr, ptr [[TMP185]], align 8 +; CHECK-NEXT: [[TMP187:%.*]] = icmp ne ptr [[TMP186]], null +; CHECK-NEXT: [[TMP188:%.*]] = or i1 [[TMP183]], [[TMP187]] +; CHECK-NEXT: [[TMP189:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24 +; CHECK-NEXT: [[TMP190:%.*]] = inttoptr i64 [[TMP189]] to ptr +; CHECK-NEXT: [[TMP191:%.*]] = load ptr, ptr [[TMP190]], align 8 +; CHECK-NEXT: [[TMP192:%.*]] = icmp ne ptr [[TMP191]], null +; CHECK-NEXT: [[TMP193:%.*]] = or i1 [[TMP188]], [[TMP192]] +; CHECK-NEXT: br i1 [[TMP193]], label [[TMP194:%.*]], label [[TMP195:%.*]], !prof [[PROF0]] +; CHECK: 194: +; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1) +; CHECK-NEXT: br label [[TMP195]] +; CHECK: 195: +; CHECK-NEXT: store ptr @__tysan_v1_int_o_0, ptr [[SHADOW_PTR26]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET29:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_PTR30:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET29]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR30]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET31:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16 +; CHECK-NEXT: [[SHADOW_BYTE_2_PTR32:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET31]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR32]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET33:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24 +; CHECK-NEXT: [[SHADOW_BYTE_3_PTR34:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET33]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR34]], align 8 +; CHECK-NEXT: br label [[TMP197:%.*]] +; CHECK: 196: +; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1) +; CHECK-NEXT: br label [[TMP197]] +; CHECK: 197: +; CHECK-NEXT: br label [[TMP219:%.*]] +; CHECK: 198: +; CHECK-NEXT: [[TMP199:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8 +; CHECK-NEXT: [[TMP200:%.*]] = inttoptr i64 [[TMP199]] to ptr +; CHECK-NEXT: [[TMP201:%.*]] = load ptr, ptr [[TMP200]], align 8 +; CHECK-NEXT: [[TMP202:%.*]] = ptrtoint ptr [[TMP201]] to i64 +; CHECK-NEXT: [[TMP203:%.*]] = icmp sge i64 [[TMP202]], 0 +; CHECK-NEXT: [[TMP204:%.*]] = or i1 false, [[TMP203]] +; CHECK-NEXT: [[TMP205:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16 +; CHECK-NEXT: [[TMP206:%.*]] = inttoptr i64 [[TMP205]] to ptr +; CHECK-NEXT: [[TMP207:%.*]] = load ptr, ptr [[TMP206]], align 8 +; CHECK-NEXT: [[TMP208:%.*]] = ptrtoint ptr [[TMP207]] to i64 +; CHECK-NEXT: [[TMP209:%.*]] = icmp sge i64 [[TMP208]], 0 +; CHECK-NEXT: [[TMP210:%.*]] = or i1 [[TMP204]], [[TMP209]] +; CHECK-NEXT: [[TMP211:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24 +; CHECK-NEXT: [[TMP212:%.*]] = inttoptr i64 [[TMP211]] to ptr +; CHECK-NEXT: [[TMP213:%.*]] = load ptr, ptr [[TMP212]], align 8 +; CHECK-NEXT: [[TMP214:%.*]] = ptrtoint ptr [[TMP213]] to i64 +; CHECK-NEXT: [[TMP215:%.*]] = icmp sge i64 [[TMP214]], 0 +; CHECK-NEXT: [[TMP216:%.*]] = or i1 [[TMP210]], [[TMP215]] +; CHECK-NEXT: br i1 [[TMP216]], label [[TMP217:%.*]], label [[TMP218:%.*]], !prof [[PROF0]] +; CHECK: 217: +; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1) +; CHECK-NEXT: br label [[TMP218]] +; CHECK: 218: +; CHECK-NEXT: br label [[TMP219]] +; CHECK: 219: +; CHECK-NEXT: [[WAA:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[TBAA1:![0-9]+]] +; CHECK-NEXT: ret i32 [[WAA]] +; +entry: + %WAA = load i32, ptr %a, align 4, !tbaa !3 + ret i32 %WAA +} + +define void @test_store(ptr %a) sanitize_type { +; CHECK-LABEL: @test_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[APP_MEM_MASK2:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 +; CHECK-NEXT: [[SHADOW_BASE1:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 +; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 ptrtoint (ptr @__tysan_app_memory_mask to i64), [[APP_MEM_MASK2]] +; CHECK-NEXT: [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3 +; CHECK-NEXT: [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE1]] +; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr +; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8 +; CHECK-NEXT: [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], null +; CHECK-NEXT: br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP42:%.*]], !prof [[PROF0]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne ptr [[TMP5]], null +; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[SHADOW_PTR_INT]], 16 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne ptr [[TMP10]], null +; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP7]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null +; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[SHADOW_PTR_INT]], 32 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne ptr [[TMP20]], null +; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP17]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 40 +; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr +; CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne ptr [[TMP25]], null +; CHECK-NEXT: [[TMP27:%.*]] = or i1 [[TMP22]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[SHADOW_PTR_INT]], 48 +; CHECK-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr +; CHECK-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne ptr [[TMP30]], null +; CHECK-NEXT: [[TMP32:%.*]] = or i1 [[TMP27]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[SHADOW_PTR_INT]], 56 +; CHECK-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr +; CHECK-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP34]], align 8 +; CHECK-NEXT: [[TMP36:%.*]] = icmp ne ptr [[TMP35]], null +; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP32]], [[TMP36]] +; CHECK-NEXT: br i1 [[TMP37]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF0]] +; CHECK: 38: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP39]] +; CHECK: 39: +; CHECK-NEXT: store ptr null, ptr [[SHADOW_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 16 +; CHECK-NEXT: [[SHADOW_BYTE_2_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 +; CHECK-NEXT: [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_4_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 32 +; CHECK-NEXT: [[SHADOW_BYTE_4_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_4_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -4 to ptr), ptr [[SHADOW_BYTE_4_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_5_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 40 +; CHECK-NEXT: [[SHADOW_BYTE_5_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_5_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -5 to ptr), ptr [[SHADOW_BYTE_5_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_6_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 48 +; CHECK-NEXT: [[SHADOW_BYTE_6_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_6_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -6 to ptr), ptr [[SHADOW_BYTE_6_PTR]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_7_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 56 +; CHECK-NEXT: [[SHADOW_BYTE_7_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_7_OFFSET]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -7 to ptr), ptr [[SHADOW_BYTE_7_PTR]], align 8 +; CHECK-NEXT: br label [[TMP41:%.*]] +; CHECK: 40: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP41]] +; CHECK: 41: +; CHECK-NEXT: br label [[TMP87:%.*]] +; CHECK: 42: +; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 +; CHECK-NEXT: [[TMP44:%.*]] = inttoptr i64 [[TMP43]] to ptr +; CHECK-NEXT: [[TMP45:%.*]] = load ptr, ptr [[TMP44]], align 8 +; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[TMP45]] to i64 +; CHECK-NEXT: [[TMP47:%.*]] = icmp sge i64 [[TMP46]], 0 +; CHECK-NEXT: [[TMP48:%.*]] = or i1 false, [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[SHADOW_PTR_INT]], 16 +; CHECK-NEXT: [[TMP50:%.*]] = inttoptr i64 [[TMP49]] to ptr +; CHECK-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8 +; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr [[TMP51]] to i64 +; CHECK-NEXT: [[TMP53:%.*]] = icmp sge i64 [[TMP52]], 0 +; CHECK-NEXT: [[TMP54:%.*]] = or i1 [[TMP48]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 +; CHECK-NEXT: [[TMP56:%.*]] = inttoptr i64 [[TMP55]] to ptr +; CHECK-NEXT: [[TMP57:%.*]] = load ptr, ptr [[TMP56]], align 8 +; CHECK-NEXT: [[TMP58:%.*]] = ptrtoint ptr [[TMP57]] to i64 +; CHECK-NEXT: [[TMP59:%.*]] = icmp sge i64 [[TMP58]], 0 +; CHECK-NEXT: [[TMP60:%.*]] = or i1 [[TMP54]], [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = add i64 [[SHADOW_PTR_INT]], 32 +; CHECK-NEXT: [[TMP62:%.*]] = inttoptr i64 [[TMP61]] to ptr +; CHECK-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP62]], align 8 +; CHECK-NEXT: [[TMP64:%.*]] = ptrtoint ptr [[TMP63]] to i64 +; CHECK-NEXT: [[TMP65:%.*]] = icmp sge i64 [[TMP64]], 0 +; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[TMP60]], [[TMP65]] +; CHECK-NEXT: [[TMP67:%.*]] = add i64 [[SHADOW_PTR_INT]], 40 +; CHECK-NEXT: [[TMP68:%.*]] = inttoptr i64 [[TMP67]] to ptr +; CHECK-NEXT: [[TMP69:%.*]] = load ptr, ptr [[TMP68]], align 8 +; CHECK-NEXT: [[TMP70:%.*]] = ptrtoint ptr [[TMP69]] to i64 +; CHECK-NEXT: [[TMP71:%.*]] = icmp sge i64 [[TMP70]], 0 +; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[TMP66]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = add i64 [[SHADOW_PTR_INT]], 48 +; CHECK-NEXT: [[TMP74:%.*]] = inttoptr i64 [[TMP73]] to ptr +; CHECK-NEXT: [[TMP75:%.*]] = load ptr, ptr [[TMP74]], align 8 +; CHECK-NEXT: [[TMP76:%.*]] = ptrtoint ptr [[TMP75]] to i64 +; CHECK-NEXT: [[TMP77:%.*]] = icmp sge i64 [[TMP76]], 0 +; CHECK-NEXT: [[TMP78:%.*]] = or i1 [[TMP72]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = add i64 [[SHADOW_PTR_INT]], 56 +; CHECK-NEXT: [[TMP80:%.*]] = inttoptr i64 [[TMP79]] to ptr +; CHECK-NEXT: [[TMP81:%.*]] = load ptr, ptr [[TMP80]], align 8 +; CHECK-NEXT: [[TMP82:%.*]] = ptrtoint ptr [[TMP81]] to i64 +; CHECK-NEXT: [[TMP83:%.*]] = icmp sge i64 [[TMP82]], 0 +; CHECK-NEXT: [[TMP84:%.*]] = or i1 [[TMP78]], [[TMP83]] +; CHECK-NEXT: br i1 [[TMP84]], label [[TMP85:%.*]], label [[TMP86:%.*]], !prof [[PROF0]] +; CHECK: 85: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP86]] +; CHECK: 86: +; CHECK-NEXT: br label [[TMP87]] +; CHECK: 87: +; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 +; CHECK-NEXT: [[APP_PTR_MASKED3:%.*]] = and i64 ptrtoint (ptr @__tysan_shadow_memory_address to i64), [[APP_MEM_MASK2]] +; CHECK-NEXT: [[APP_PTR_SHIFTED4:%.*]] = shl i64 [[APP_PTR_MASKED3]], 3 +; CHECK-NEXT: [[SHADOW_PTR_INT5:%.*]] = add i64 [[APP_PTR_SHIFTED4]], [[SHADOW_BASE1]] +; CHECK-NEXT: [[SHADOW_PTR6:%.*]] = inttoptr i64 [[SHADOW_PTR_INT5]] to ptr +; CHECK-NEXT: [[SHADOW_DESC7:%.*]] = load ptr, ptr [[SHADOW_PTR6]], align 8 +; CHECK-NEXT: [[BAD_DESC8:%.*]] = icmp ne ptr [[SHADOW_DESC7]], null +; CHECK-NEXT: br i1 [[BAD_DESC8]], label [[TMP88:%.*]], label [[TMP130:%.*]], !prof [[PROF0]] +; CHECK: 88: +; CHECK-NEXT: [[TMP89:%.*]] = icmp eq ptr [[SHADOW_DESC7]], null +; CHECK-NEXT: br i1 [[TMP89]], label [[TMP90:%.*]], label [[TMP128:%.*]] +; CHECK: 90: +; CHECK-NEXT: [[TMP91:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8 +; CHECK-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP91]] to ptr +; CHECK-NEXT: [[TMP93:%.*]] = load ptr, ptr [[TMP92]], align 8 +; CHECK-NEXT: [[TMP94:%.*]] = icmp ne ptr [[TMP93]], null +; CHECK-NEXT: [[TMP95:%.*]] = or i1 false, [[TMP94]] +; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16 +; CHECK-NEXT: [[TMP97:%.*]] = inttoptr i64 [[TMP96]] to ptr +; CHECK-NEXT: [[TMP98:%.*]] = load ptr, ptr [[TMP97]], align 8 +; CHECK-NEXT: [[TMP99:%.*]] = icmp ne ptr [[TMP98]], null +; CHECK-NEXT: [[TMP100:%.*]] = or i1 [[TMP95]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24 +; CHECK-NEXT: [[TMP102:%.*]] = inttoptr i64 [[TMP101]] to ptr +; CHECK-NEXT: [[TMP103:%.*]] = load ptr, ptr [[TMP102]], align 8 +; CHECK-NEXT: [[TMP104:%.*]] = icmp ne ptr [[TMP103]], null +; CHECK-NEXT: [[TMP105:%.*]] = or i1 [[TMP100]], [[TMP104]] +; CHECK-NEXT: [[TMP106:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32 +; CHECK-NEXT: [[TMP107:%.*]] = inttoptr i64 [[TMP106]] to ptr +; CHECK-NEXT: [[TMP108:%.*]] = load ptr, ptr [[TMP107]], align 8 +; CHECK-NEXT: [[TMP109:%.*]] = icmp ne ptr [[TMP108]], null +; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[TMP105]], [[TMP109]] +; CHECK-NEXT: [[TMP111:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40 +; CHECK-NEXT: [[TMP112:%.*]] = inttoptr i64 [[TMP111]] to ptr +; CHECK-NEXT: [[TMP113:%.*]] = load ptr, ptr [[TMP112]], align 8 +; CHECK-NEXT: [[TMP114:%.*]] = icmp ne ptr [[TMP113]], null +; CHECK-NEXT: [[TMP115:%.*]] = or i1 [[TMP110]], [[TMP114]] +; CHECK-NEXT: [[TMP116:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48 +; CHECK-NEXT: [[TMP117:%.*]] = inttoptr i64 [[TMP116]] to ptr +; CHECK-NEXT: [[TMP118:%.*]] = load ptr, ptr [[TMP117]], align 8 +; CHECK-NEXT: [[TMP119:%.*]] = icmp ne ptr [[TMP118]], null +; CHECK-NEXT: [[TMP120:%.*]] = or i1 [[TMP115]], [[TMP119]] +; CHECK-NEXT: [[TMP121:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56 +; CHECK-NEXT: [[TMP122:%.*]] = inttoptr i64 [[TMP121]] to ptr +; CHECK-NEXT: [[TMP123:%.*]] = load ptr, ptr [[TMP122]], align 8 +; CHECK-NEXT: [[TMP124:%.*]] = icmp ne ptr [[TMP123]], null +; CHECK-NEXT: [[TMP125:%.*]] = or i1 [[TMP120]], [[TMP124]] +; CHECK-NEXT: br i1 [[TMP125]], label [[TMP126:%.*]], label [[TMP127:%.*]], !prof [[PROF0]] +; CHECK: 126: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP127]] +; CHECK: 127: +; CHECK-NEXT: store ptr null, ptr [[SHADOW_PTR6]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET9:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_PTR10:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET9]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR10]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET11:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16 +; CHECK-NEXT: [[SHADOW_BYTE_2_PTR12:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET11]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR12]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET13:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24 +; CHECK-NEXT: [[SHADOW_BYTE_3_PTR14:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET13]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR14]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_4_OFFSET15:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32 +; CHECK-NEXT: [[SHADOW_BYTE_4_PTR16:%.*]] = inttoptr i64 [[SHADOW_BYTE_4_OFFSET15]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -4 to ptr), ptr [[SHADOW_BYTE_4_PTR16]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_5_OFFSET17:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40 +; CHECK-NEXT: [[SHADOW_BYTE_5_PTR18:%.*]] = inttoptr i64 [[SHADOW_BYTE_5_OFFSET17]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -5 to ptr), ptr [[SHADOW_BYTE_5_PTR18]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_6_OFFSET19:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48 +; CHECK-NEXT: [[SHADOW_BYTE_6_PTR20:%.*]] = inttoptr i64 [[SHADOW_BYTE_6_OFFSET19]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -6 to ptr), ptr [[SHADOW_BYTE_6_PTR20]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_7_OFFSET21:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56 +; CHECK-NEXT: [[SHADOW_BYTE_7_PTR22:%.*]] = inttoptr i64 [[SHADOW_BYTE_7_OFFSET21]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -7 to ptr), ptr [[SHADOW_BYTE_7_PTR22]], align 8 +; CHECK-NEXT: br label [[TMP129:%.*]] +; CHECK: 128: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP129]] +; CHECK: 129: +; CHECK-NEXT: br label [[TMP175:%.*]] +; CHECK: 130: +; CHECK-NEXT: [[TMP131:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8 +; CHECK-NEXT: [[TMP132:%.*]] = inttoptr i64 [[TMP131]] to ptr +; CHECK-NEXT: [[TMP133:%.*]] = load ptr, ptr [[TMP132]], align 8 +; CHECK-NEXT: [[TMP134:%.*]] = ptrtoint ptr [[TMP133]] to i64 +; CHECK-NEXT: [[TMP135:%.*]] = icmp sge i64 [[TMP134]], 0 +; CHECK-NEXT: [[TMP136:%.*]] = or i1 false, [[TMP135]] +; CHECK-NEXT: [[TMP137:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16 +; CHECK-NEXT: [[TMP138:%.*]] = inttoptr i64 [[TMP137]] to ptr +; CHECK-NEXT: [[TMP139:%.*]] = load ptr, ptr [[TMP138]], align 8 +; CHECK-NEXT: [[TMP140:%.*]] = ptrtoint ptr [[TMP139]] to i64 +; CHECK-NEXT: [[TMP141:%.*]] = icmp sge i64 [[TMP140]], 0 +; CHECK-NEXT: [[TMP142:%.*]] = or i1 [[TMP136]], [[TMP141]] +; CHECK-NEXT: [[TMP143:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24 +; CHECK-NEXT: [[TMP144:%.*]] = inttoptr i64 [[TMP143]] to ptr +; CHECK-NEXT: [[TMP145:%.*]] = load ptr, ptr [[TMP144]], align 8 +; CHECK-NEXT: [[TMP146:%.*]] = ptrtoint ptr [[TMP145]] to i64 +; CHECK-NEXT: [[TMP147:%.*]] = icmp sge i64 [[TMP146]], 0 +; CHECK-NEXT: [[TMP148:%.*]] = or i1 [[TMP142]], [[TMP147]] +; CHECK-NEXT: [[TMP149:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32 +; CHECK-NEXT: [[TMP150:%.*]] = inttoptr i64 [[TMP149]] to ptr +; CHECK-NEXT: [[TMP151:%.*]] = load ptr, ptr [[TMP150]], align 8 +; CHECK-NEXT: [[TMP152:%.*]] = ptrtoint ptr [[TMP151]] to i64 +; CHECK-NEXT: [[TMP153:%.*]] = icmp sge i64 [[TMP152]], 0 +; CHECK-NEXT: [[TMP154:%.*]] = or i1 [[TMP148]], [[TMP153]] +; CHECK-NEXT: [[TMP155:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40 +; CHECK-NEXT: [[TMP156:%.*]] = inttoptr i64 [[TMP155]] to ptr +; CHECK-NEXT: [[TMP157:%.*]] = load ptr, ptr [[TMP156]], align 8 +; CHECK-NEXT: [[TMP158:%.*]] = ptrtoint ptr [[TMP157]] to i64 +; CHECK-NEXT: [[TMP159:%.*]] = icmp sge i64 [[TMP158]], 0 +; CHECK-NEXT: [[TMP160:%.*]] = or i1 [[TMP154]], [[TMP159]] +; CHECK-NEXT: [[TMP161:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48 +; CHECK-NEXT: [[TMP162:%.*]] = inttoptr i64 [[TMP161]] to ptr +; CHECK-NEXT: [[TMP163:%.*]] = load ptr, ptr [[TMP162]], align 8 +; CHECK-NEXT: [[TMP164:%.*]] = ptrtoint ptr [[TMP163]] to i64 +; CHECK-NEXT: [[TMP165:%.*]] = icmp sge i64 [[TMP164]], 0 +; CHECK-NEXT: [[TMP166:%.*]] = or i1 [[TMP160]], [[TMP165]] +; CHECK-NEXT: [[TMP167:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56 +; CHECK-NEXT: [[TMP168:%.*]] = inttoptr i64 [[TMP167]] to ptr +; CHECK-NEXT: [[TMP169:%.*]] = load ptr, ptr [[TMP168]], align 8 +; CHECK-NEXT: [[TMP170:%.*]] = ptrtoint ptr [[TMP169]] to i64 +; CHECK-NEXT: [[TMP171:%.*]] = icmp sge i64 [[TMP170]], 0 +; CHECK-NEXT: [[TMP172:%.*]] = or i1 [[TMP166]], [[TMP171]] +; CHECK-NEXT: br i1 [[TMP172]], label [[TMP173:%.*]], label [[TMP174:%.*]], !prof [[PROF0]] +; CHECK: 173: +; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1) +; CHECK-NEXT: br label [[TMP174]] +; CHECK: 174: +; CHECK-NEXT: br label [[TMP175]] +; CHECK: 175: +; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 +; CHECK-NEXT: call void @__tysan_instrument_with_shadow_update(ptr [[A:%.*]], ptr @__tysan_v1___ZTS1v_o_12, i1 true, i64 4, i32 2) +; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[APP_PTR_MASKED23:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK2]] +; CHECK-NEXT: [[APP_PTR_SHIFTED24:%.*]] = shl i64 [[APP_PTR_MASKED23]], 3 +; CHECK-NEXT: [[SHADOW_PTR_INT25:%.*]] = add i64 [[APP_PTR_SHIFTED24]], [[SHADOW_BASE1]] +; CHECK-NEXT: [[SHADOW_PTR26:%.*]] = inttoptr i64 [[SHADOW_PTR_INT25]] to ptr +; CHECK-NEXT: [[SHADOW_DESC27:%.*]] = load ptr, ptr [[SHADOW_PTR26]], align 8 +; CHECK-NEXT: [[BAD_DESC28:%.*]] = icmp ne ptr [[SHADOW_DESC27]], @__tysan_v1___ZTS1v_o_12 +; CHECK-NEXT: br i1 [[BAD_DESC28]], label [[TMP176:%.*]], label [[TMP198:%.*]], !prof [[PROF0]] +; CHECK: 176: +; CHECK-NEXT: [[TMP177:%.*]] = icmp eq ptr [[SHADOW_DESC27]], null +; CHECK-NEXT: br i1 [[TMP177]], label [[TMP178:%.*]], label [[TMP196:%.*]] +; CHECK: 178: +; CHECK-NEXT: [[TMP179:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8 +; CHECK-NEXT: [[TMP180:%.*]] = inttoptr i64 [[TMP179]] to ptr +; CHECK-NEXT: [[TMP181:%.*]] = load ptr, ptr [[TMP180]], align 8 +; CHECK-NEXT: [[TMP182:%.*]] = icmp ne ptr [[TMP181]], null +; CHECK-NEXT: [[TMP183:%.*]] = or i1 false, [[TMP182]] +; CHECK-NEXT: [[TMP184:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16 +; CHECK-NEXT: [[TMP185:%.*]] = inttoptr i64 [[TMP184]] to ptr +; CHECK-NEXT: [[TMP186:%.*]] = load ptr, ptr [[TMP185]], align 8 +; CHECK-NEXT: [[TMP187:%.*]] = icmp ne ptr [[TMP186]], null +; CHECK-NEXT: [[TMP188:%.*]] = or i1 [[TMP183]], [[TMP187]] +; CHECK-NEXT: [[TMP189:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24 +; CHECK-NEXT: [[TMP190:%.*]] = inttoptr i64 [[TMP189]] to ptr +; CHECK-NEXT: [[TMP191:%.*]] = load ptr, ptr [[TMP190]], align 8 +; CHECK-NEXT: [[TMP192:%.*]] = icmp ne ptr [[TMP191]], null +; CHECK-NEXT: [[TMP193:%.*]] = or i1 [[TMP188]], [[TMP192]] +; CHECK-NEXT: br i1 [[TMP193]], label [[TMP194:%.*]], label [[TMP195:%.*]], !prof [[PROF0]] +; CHECK: 194: +; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2) +; CHECK-NEXT: br label [[TMP195]] +; CHECK: 195: +; CHECK-NEXT: store ptr @__tysan_v1___ZTS1v_o_12, ptr [[SHADOW_PTR26]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET29:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8 +; CHECK-NEXT: [[SHADOW_BYTE_1_PTR30:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET29]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR30]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET31:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16 +; CHECK-NEXT: [[SHADOW_BYTE_2_PTR32:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET31]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR32]], align 8 +; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET33:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24 +; CHECK-NEXT: [[SHADOW_BYTE_3_PTR34:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET33]] to ptr +; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR34]], align 8 +; CHECK-NEXT: br label [[TMP197:%.*]] +; CHECK: 196: +; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2) +; CHECK-NEXT: br label [[TMP197]] +; CHECK: 197: +; CHECK-NEXT: br label [[TMP219:%.*]] +; CHECK: 198: +; CHECK-NEXT: [[TMP199:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8 +; CHECK-NEXT: [[TMP200:%.*]] = inttoptr i64 [[TMP199]] to ptr +; CHECK-NEXT: [[TMP201:%.*]] = load ptr, ptr [[TMP200]], align 8 +; CHECK-NEXT: [[TMP202:%.*]] = ptrtoint ptr [[TMP201]] to i64 +; CHECK-NEXT: [[TMP203:%.*]] = icmp sge i64 [[TMP202]], 0 +; CHECK-NEXT: [[TMP204:%.*]] = or i1 false, [[TMP203]] +; CHECK-NEXT: [[TMP205:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16 +; CHECK-NEXT: [[TMP206:%.*]] = inttoptr i64 [[TMP205]] to ptr +; CHECK-NEXT: [[TMP207:%.*]] = load ptr, ptr [[TMP206]], align 8 +; CHECK-NEXT: [[TMP208:%.*]] = ptrtoint ptr [[TMP207]] to i64 +; CHECK-NEXT: [[TMP209:%.*]] = icmp sge i64 [[TMP208]], 0 +; CHECK-NEXT: [[TMP210:%.*]] = or i1 [[TMP204]], [[TMP209]] +; CHECK-NEXT: [[TMP211:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24 +; CHECK-NEXT: [[TMP212:%.*]] = inttoptr i64 [[TMP211]] to ptr +; CHECK-NEXT: [[TMP213:%.*]] = load ptr, ptr [[TMP212]], align 8 +; CHECK-NEXT: [[TMP214:%.*]] = ptrtoint ptr [[TMP213]] to i64 +; CHECK-NEXT: [[TMP215:%.*]] = icmp sge i64 [[TMP214]], 0 +; CHECK-NEXT: [[TMP216:%.*]] = or i1 [[TMP210]], [[TMP215]] +; CHECK-NEXT: br i1 [[TMP216]], label [[TMP217:%.*]], label [[TMP218:%.*]], !prof [[PROF0]] +; CHECK: 217: +; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2) +; CHECK-NEXT: br label [[TMP218]] +; CHECK: 218: +; CHECK-NEXT: br label [[TMP219]] +; CHECK: 219: +; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: ret void +; +entry: + store i32 42, ptr %a, align 4, !tbaa !6 + ret void +} + +!0 = !{!"Simple C++ TBAA"} +!1 = !{!"omnipotent char", !0, i64 0} +!2 = !{!"int", !1, i64 0} +!3 = !{!2, !2, i64 0} +!4 = !{!"_ZTS1x", !2, i64 0, !2, i64 4} +!5 = !{!"_ZTS1v", !2, i64 8, !2, i64 12, !4, i64 16} +!6 = !{!5, !2, i64 12} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { sanitize_type } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind } +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 100000} +; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C++ TBAA"} +; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META2]], i64 12} +; CHECK: [[META6]] = !{!"_ZTS1v", [[META2]], i64 8, [[META2]], i64 12, [[META7:![0-9]+]], i64 16} +; CHECK: [[META7]] = !{!"_ZTS1x", [[META2]], i64 0, [[META2]], i64 4} +;. diff --git a/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll b/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll new file mode 100644 index 0000000..0bd7940 --- /dev/null +++ b/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs +; RUN: opt -passes='tysan' -tysan-outline-instrumentation -S %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@global1 = global i32 0, align 4 +@global2 = global i32 0, align 4 + + +; CHECK-LABEL: define internal void @__tysan_set_globals_types( +; CHECK-NEXT: %app.mem.mask = load i64, ptr @__tysan_app_memory_mask, align 8 +; CHECK-NEXT: %shadow.base = load i64, ptr @__tysan_shadow_memory_address, align 8 +; CHECK-NEXT: call void @__tysan_set_shadow_type(ptr @global1, ptr @__tysan_v1_int, i64 4) +; CHECK-NEXT: call void @__tysan_set_shadow_type(ptr @global1, ptr @__tysan_v1_int, i64 4) +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +!llvm.tysan.globals = !{!13, !14} + +!0 = !{!"Simple C++ TBAA"} +!1 = !{!"omnipotent char", !0, i64 0} +!2 = !{!"int", !1, i64 0} +!13 = !{ptr @global1, !2} +!14 = !{ptr @global1, !2} diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s index fec8ba1..0a480a7 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s @@ -2,33 +2,33 @@ ; RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s tensor_load_to_lds s[0:3], s[4:11] -// GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c] +// GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS -// GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c] +// GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] -// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10] +// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV -// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10] +// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_store_from_lds s[0:3], s[4:11] -// GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c] +// GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS -// GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c] +// GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] -// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10] +// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV -// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10] +// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt index 9afaa07..8005793 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt @@ -1,25 +1,25 @@ # RUN: llvm-mc -disassemble -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s -# GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c] -0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c +# GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c] +0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c -# GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c] -0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c +# GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c] +0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c -# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10] -0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10 +# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10] +0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10 -# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10] -0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10 +# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10] +0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10 -# GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c] -0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c +# GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c] +0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c -# GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c] -0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c +# GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c] +0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c -# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10] -0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10 +# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10] +0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10 -# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10] -0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10 +# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10] +0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10 diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt index 57e3153..5c2927a 100755 --- a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt +++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt @@ -1,70 +1,6 @@ # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s -check-prefix=ATT # RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s -check-prefix=INTEL -# ATT: t2rpntlvwz0rs 268435456(%rbp,%r14,8), %tmm6 -# INTEL: t2rpntlvwz0rs tmm6, [rbp + 8*r14 + 268435456] -0xc4,0xa5,0x78,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz0rs 291(%r8,%rax,4), %tmm2 -# INTEL: t2rpntlvwz0rs tmm2, [r8 + 4*rax + 291] -0xc4,0xc5,0x78,0xf8,0x94,0x80,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz0rs 64(%rbx), %tmm6 -# INTEL: t2rpntlvwz0rs tmm6, [rbx + 64] -0xc4,0xe5,0x78,0xf8,0x74,0x23,0x40 - -# ATT: t2rpntlvwz0rs -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz0rs tmm2, [2*rbp - 32] -0xc4,0xe5,0x78,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz0rst1 268435456(%rbp,%r14,8), %tmm6 -# INTEL: t2rpntlvwz0rst1 tmm6, [rbp + 8*r14 + 268435456] -0xc4,0xa5,0x78,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz0rst1 291(%r8,%rax,4), %tmm2 -# INTEL: t2rpntlvwz0rst1 tmm2, [r8 + 4*rax + 291] -0xc4,0xc5,0x78,0xf9,0x94,0x80,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz0rst1 64(%rbx), %tmm6 -# INTEL: t2rpntlvwz0rst1 tmm6, [rbx + 64] -0xc4,0xe5,0x78,0xf9,0x74,0x23,0x40 - -# ATT: t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz0rst1 tmm2, [2*rbp - 32] -0xc4,0xe5,0x78,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz1rs 268435456(%rbp,%r14,8), %tmm6 -# INTEL: t2rpntlvwz1rs tmm6, [rbp + 8*r14 + 268435456] -0xc4,0xa5,0x79,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz1rs 291(%r8,%rax,4), %tmm2 -# INTEL: t2rpntlvwz1rs tmm2, [r8 + 4*rax + 291] -0xc4,0xc5,0x79,0xf8,0x94,0x80,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz1rs 64(%rbx), %tmm6 -# INTEL: t2rpntlvwz1rs tmm6, [rbx + 64] -0xc4,0xe5,0x79,0xf8,0x74,0x23,0x40 - -# ATT: t2rpntlvwz1rs -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz1rs tmm2, [2*rbp - 32] -0xc4,0xe5,0x79,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz1rst1 268435456(%rbp,%r14,8), %tmm6 -# INTEL: t2rpntlvwz1rst1 tmm6, [rbp + 8*r14 + 268435456] -0xc4,0xa5,0x79,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz1rst1 291(%r8,%rax,4), %tmm2 -# INTEL: t2rpntlvwz1rst1 tmm2, [r8 + 4*rax + 291] -0xc4,0xc5,0x79,0xf9,0x94,0x80,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz1rst1 64(%rbx), %tmm6 -# INTEL: t2rpntlvwz1rst1 tmm6, [rbx + 64] -0xc4,0xe5,0x79,0xf9,0x74,0x23,0x40 - -# ATT: t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz1rst1 tmm2, [2*rbp - 32] -0xc4,0xe5,0x79,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff - # ATT: tileloaddrs 268435456(%rbp,%r14,8), %tmm6 # INTEL: tileloaddrs tmm6, [rbp + 8*r14 + 268435456] 0xc4,0xa2,0x7b,0x4a,0xb4,0xf5,0x00,0x00,0x00,0x10 @@ -97,70 +33,6 @@ # INTEL: tileloaddrst1 tmm3, [2*rbp - 32] 0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff -# ATT: t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6 -# INTEL: t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456] -0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2 -# INTEL: t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291] -0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz0rs 64(%r18), %tmm6 -# INTEL: t2rpntlvwz0rs tmm6, [r18 + 64] -0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40 - -# ATT: t2rpntlvwz0rs -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz0rs tmm2, [2*rbp - 32] -0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6 -# INTEL: t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456] -0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2 -# INTEL: t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291] -0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz0rst1 64(%r18), %tmm6 -# INTEL: t2rpntlvwz0rst1 tmm6, [r18 + 64] -0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40 - -# ATT: t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz0rst1 tmm2, [2*rbp - 32] -0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6 -# INTEL: t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456] -0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2 -# INTEL: t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291] -0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz1rs 64(%r18), %tmm6 -# INTEL: t2rpntlvwz1rs tmm6, [r18 + 64] -0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40 - -# ATT: t2rpntlvwz1rs -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz1rs tmm2, [2*rbp - 32] -0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6 -# INTEL: t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456] -0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2 -# INTEL: t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291] -0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz1rst1 64(%r18), %tmm6 -# INTEL: t2rpntlvwz1rst1 tmm6, [r18 + 64] -0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40 - -# ATT: t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz1rst1 tmm2, [2*rbp - 32] -0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff - # ATT: tileloaddrs 268435456(%r16,%r14,8), %tmm6 # INTEL: tileloaddrs tmm6, [r16 + 8*r14 + 268435456] 0x62,0xba,0x7f,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10 diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt index f372c42..347e61c 100644 --- a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt +++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt @@ -9,11 +9,3 @@ # INTEL: tmmultf32ps tmm3, tmm2, tmm1 0xc4,0xe2,0x71,0x48,0xda -# ATT: ttmmultf32ps %tmm4, %tmm5, %tmm6 -# INTEL: ttmmultf32ps tmm6, tmm5, tmm4 -0xc4,0xe2,0x58,0x48,0xf5 - -# ATT: ttmmultf32ps %tmm1, %tmm2, %tmm3 -# INTEL: ttmmultf32ps tmm3, tmm2, tmm1 -0xc4,0xe2,0x70,0x48,0xda - diff --git a/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt b/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt deleted file mode 100644 index d768630..0000000 --- a/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt +++ /dev/null @@ -1,154 +0,0 @@ -# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT -# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL - -# ATT: t2rpntlvwz0 268435456(%rbp,%r14,8), %tmm4 -# INTEL: t2rpntlvwz0 tmm4, [rbp + 8*r14 + 268435456] -0xc4,0xa2,0x78,0x6e,0xa4,0xf5,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz0 291(%r8,%rax,4), %tmm2 -# INTEL: t2rpntlvwz0 tmm2, [r8 + 4*rax + 291] -0xc4,0xc2,0x78,0x6e,0x94,0x80,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz0 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz0 tmm2, [2*rbp - 32] -0xc4,0xe2,0x78,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz0t1 268435456(%rbp,%r14,8), %tmm4 -# INTEL: t2rpntlvwz0t1 tmm4, [rbp + 8*r14 + 268435456] -0xc4,0xa2,0x78,0x6f,0xa4,0xf5,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz0t1 291(%r8,%rax,4), %tmm2 -# INTEL: t2rpntlvwz0t1 tmm2, [r8 + 4*rax + 291] -0xc4,0xc2,0x78,0x6f,0x94,0x80,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz0t1 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz0t1 tmm2, [2*rbp - 32] -0xc4,0xe2,0x78,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz1 268435456(%rbp,%r14,8), %tmm4 -# INTEL: t2rpntlvwz1 tmm4, [rbp + 8*r14 + 268435456] -0xc4,0xa2,0x79,0x6e,0xa4,0xf5,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz1 291(%r8,%rax,4), %tmm2 -# INTEL: t2rpntlvwz1 tmm2, [r8 + 4*rax + 291] -0xc4,0xc2,0x79,0x6e,0x94,0x80,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz1 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz1 tmm2, [2*rbp - 32] -0xc4,0xe2,0x79,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz1t1 268435456(%rbp,%r14,8), %tmm4 -# INTEL: t2rpntlvwz1t1 tmm4, [rbp + 8*r14 + 268435456] -0xc4,0xa2,0x79,0x6f,0xa4,0xf5,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz1t1 291(%r8,%rax,4), %tmm2 -# INTEL: t2rpntlvwz1t1 tmm2, [r8 + 4*rax + 291] -0xc4,0xc2,0x79,0x6f,0x94,0x80,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz1t1 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz1t1 tmm2, [2*rbp - 32] -0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4 -# INTEL: t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456] -0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz0 291(%r8,%r17,4), %tmm2 -# INTEL: t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291] -0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz0 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz0 tmm2, [2*rbp - 32] -0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4 -# INTEL: t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456] -0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2 -# INTEL: t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291] -0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz0t1 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz0t1 tmm2, [2*rbp - 32] -0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4 -# INTEL: t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456] -0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz1 291(%r8,%r17,4), %tmm2 -# INTEL: t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291] -0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz1 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz1 tmm2, [2*rbp - 32] -0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4 -# INTEL: t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456] -0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10 - -# ATT: t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2 -# INTEL: t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291] -0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00 - -# ATT: t2rpntlvwz1t1 -32(,%rbp,2), %tmm2 -# INTEL: t2rpntlvwz1t1 tmm2, [2*rbp - 32] -0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff - -# ATT: ttransposed %tmm1, %tmm2 -# INTEL: ttransposed tmm2, tmm1 -0xc4,0xe2,0x7a,0x5f,0xd1 - -# ATT: ttransposed %tmm2, %tmm3 -# INTEL: ttransposed tmm3, tmm2 -0xc4,0xe2,0x7a,0x5f,0xda - -# ATT: ttdpbf16ps %tmm7, %tmm6, %tmm5 -# INTEL: ttdpbf16ps tmm5, tmm6, tmm7 -0xc4,0xe2,0x42,0x6c,0xee - -# ATT: ttdpbf16ps %tmm1, %tmm2, %tmm3 -# INTEL: ttdpbf16ps tmm3, tmm2, tmm1 -0xc4,0xe2,0x72,0x6c,0xda - -# ATT: ttdpfp16ps %tmm7, %tmm6, %tmm5 -# INTEL: ttdpfp16ps tmm5, tmm6, tmm7 -0xc4,0xe2,0x43,0x6c,0xee - -# ATT: ttdpfp16ps %tmm1, %tmm2, %tmm3 -# INTEL: ttdpfp16ps tmm3, tmm2, tmm1 -0xc4,0xe2,0x73,0x6c,0xda - -# ATT: ttcmmimfp16ps %tmm4, %tmm5, %tmm6 -# INTEL: ttcmmimfp16ps tmm6, tmm5, tmm4 -0xc4,0xe2,0x5b,0x6b,0xf5 - -# ATT: ttcmmimfp16ps %tmm1, %tmm2, %tmm3 -# INTEL: ttcmmimfp16ps tmm3, tmm2, tmm1 -0xc4,0xe2,0x73,0x6b,0xda - -# ATT: ttcmmrlfp16ps %tmm4, %tmm5, %tmm6 -# INTEL: ttcmmrlfp16ps tmm6, tmm5, tmm4 -0xc4,0xe2,0x5a,0x6b,0xf5 - -# ATT: ttcmmrlfp16ps %tmm1, %tmm2, %tmm3 -# INTEL: ttcmmrlfp16ps tmm3, tmm2, tmm1 -0xc4,0xe2,0x72,0x6b,0xda - -# ATT: tconjtcmmimfp16ps %tmm4, %tmm5, %tmm6 -# INTEL: tconjtcmmimfp16ps tmm6, tmm5, tmm4 -0xc4,0xe2,0x58,0x6b,0xf5 - -# ATT: tconjtcmmimfp16ps %tmm1, %tmm2, %tmm3 -# INTEL: tconjtcmmimfp16ps tmm3, tmm2, tmm1 -0xc4,0xe2,0x70,0x6b,0xda - -# ATT: tconjtfp16 %tmm5, %tmm6 -# INTEL: tconjtfp16 tmm6, tmm5 -0xc4,0xe2,0x79,0x6b,0xf5 - -# ATT: tconjtfp16 %tmm2, %tmm3 -# INTEL: tconjtfp16 tmm3, tmm2 -0xc4,0xe2,0x79,0x6b,0xda diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s index 92db672..497a1c6 100755 --- a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s +++ b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s @@ -1,69 +1,5 @@ // RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s -// CHECK: t2rpntlvwz0rs 268435456(%rbp,%r14,8), %tmm6 -// CHECK: encoding: [0xc4,0xa5,0x78,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz0rs 268435456(%rbp,%r14,8), %tmm6 - -// CHECK: t2rpntlvwz0rs 291(%r8,%rax,4), %tmm2 -// CHECK: encoding: [0xc4,0xc5,0x78,0xf8,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz0rs 291(%r8,%rax,4), %tmm2 - -// CHECK: t2rpntlvwz0rs 64(%rbx), %tmm6 -// CHECK: encoding: [0xc4,0xe5,0x78,0xf8,0x74,0x23,0x40] - t2rpntlvwz0rs 64(%rbx), %tmm6 - -// CHECK: t2rpntlvwz0rs -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0xc4,0xe5,0x78,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz0rs -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz0rst1 268435456(%rbp,%r14,8), %tmm6 -// CHECK: encoding: [0xc4,0xa5,0x78,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz0rst1 268435456(%rbp,%r14,8), %tmm6 - -// CHECK: t2rpntlvwz0rst1 291(%r8,%rax,4), %tmm2 -// CHECK: encoding: [0xc4,0xc5,0x78,0xf9,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz0rst1 291(%r8,%rax,4), %tmm2 - -// CHECK: t2rpntlvwz0rst1 64(%rbx), %tmm6 -// CHECK: encoding: [0xc4,0xe5,0x78,0xf9,0x74,0x23,0x40] - t2rpntlvwz0rst1 64(%rbx), %tmm6 - -// CHECK: t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz1rs 268435456(%rbp,%r14,8), %tmm6 -// CHECK: encoding: [0xc4,0xa5,0x79,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz1rs 268435456(%rbp,%r14,8), %tmm6 - -// CHECK: t2rpntlvwz1rs 291(%r8,%rax,4), %tmm2 -// CHECK: encoding: [0xc4,0xc5,0x79,0xf8,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz1rs 291(%r8,%rax,4), %tmm2 - -// CHECK: t2rpntlvwz1rs 64(%rbx), %tmm6 -// CHECK: encoding: [0xc4,0xe5,0x79,0xf8,0x74,0x23,0x40] - t2rpntlvwz1rs 64(%rbx), %tmm6 - -// CHECK: t2rpntlvwz1rs -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0xc4,0xe5,0x79,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz1rs -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz1rst1 268435456(%rbp,%r14,8), %tmm6 -// CHECK: encoding: [0xc4,0xa5,0x79,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz1rst1 268435456(%rbp,%r14,8), %tmm6 - -// CHECK: t2rpntlvwz1rst1 291(%r8,%rax,4), %tmm2 -// CHECK: encoding: [0xc4,0xc5,0x79,0xf9,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz1rst1 291(%r8,%rax,4), %tmm2 - -// CHECK: t2rpntlvwz1rst1 64(%rbx), %tmm6 -// CHECK: encoding: [0xc4,0xe5,0x79,0xf9,0x74,0x23,0x40] - t2rpntlvwz1rst1 64(%rbx), %tmm6 - -// CHECK: t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2 - // CHECK: tileloaddrs 268435456(%rbp,%r14,8), %tmm6 // CHECK: encoding: [0xc4,0xa2,0x7b,0x4a,0xb4,0xf5,0x00,0x00,0x00,0x10] tileloaddrs 268435456(%rbp,%r14,8), %tmm6 @@ -88,70 +24,6 @@ // CHECK: encoding: [0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff] tileloaddrst1 -32(,%rbp,2), %tmm3 -// CHECK: t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6 -// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6 - -// CHECK: t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2 -// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2 - -// CHECK: t2rpntlvwz0rs 64(%r18), %tmm6 -// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40] - t2rpntlvwz0rs 64(%r18), %tmm6 - -// CHECK: {evex} t2rpntlvwz0rs -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz0rs -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6 -// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6 - -// CHECK: t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2 -// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2 - -// CHECK: t2rpntlvwz0rst1 64(%r18), %tmm6 -// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40] - t2rpntlvwz0rst1 64(%r18), %tmm6 - -// CHECK: {evex} t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6 -// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6 - -// CHECK: t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2 -// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2 - -// CHECK: t2rpntlvwz1rs 64(%r18), %tmm6 -// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40] - t2rpntlvwz1rs 64(%r18), %tmm6 - -// CHECK: {evex} t2rpntlvwz1rs -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz1rs -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6 -// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6 - -// CHECK: t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2 -// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2 - -// CHECK: t2rpntlvwz1rst1 64(%r18), %tmm6 -// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40] - t2rpntlvwz1rst1 64(%r18), %tmm6 - -// CHECK: {evex} t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2 - // CHECK: tileloaddrs 291(%r16,%rax,4), %tmm3 // CHECK: encoding: [0x62,0xfa,0x7f,0x08,0x4a,0x9c,0x80,0x23,0x01,0x00,0x00] tileloaddrs 291(%r16,%rax,4), %tmm3 diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s index 140d1aa..0e030ca 100755 --- a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s +++ b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s @@ -1,69 +1,5 @@ // RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s -// CHECK: t2rpntlvwz0rs tmm6, [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0xc4,0xa5,0x78,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz0rs tmm6, [rbp + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz0rs tmm2, [r8 + 4*rax + 291] -// CHECK: encoding: [0xc4,0xc5,0x78,0xf8,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz0rs tmm2, [r8 + 4*rax + 291] - -// CHECK: t2rpntlvwz0rs tmm6, [rbx + 64] -// CHECK: encoding: [0xc4,0xe5,0x78,0xf8,0x74,0x23,0x40] - t2rpntlvwz0rs tmm6, [rbx + 64] - -// CHECK: t2rpntlvwz0rs tmm2, [2*rbp - 32] -// CHECK: encoding: [0xc4,0xe5,0x78,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz0rs tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz0rst1 tmm6, [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0xc4,0xa5,0x78,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz0rst1 tmm6, [rbp + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz0rst1 tmm2, [r8 + 4*rax + 291] -// CHECK: encoding: [0xc4,0xc5,0x78,0xf9,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz0rst1 tmm2, [r8 + 4*rax + 291] - -// CHECK: t2rpntlvwz0rst1 tmm6, [rbx + 64] -// CHECK: encoding: [0xc4,0xe5,0x78,0xf9,0x74,0x23,0x40] - t2rpntlvwz0rst1 tmm6, [rbx + 64] - -// CHECK: t2rpntlvwz0rst1 tmm2, [2*rbp - 32] -// CHECK: encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz0rst1 tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz1rs tmm6, [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0xc4,0xa5,0x79,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz1rs tmm6, [rbp + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz1rs tmm2, [r8 + 4*rax + 291] -// CHECK: encoding: [0xc4,0xc5,0x79,0xf8,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz1rs tmm2, [r8 + 4*rax + 291] - -// CHECK: t2rpntlvwz1rs tmm6, [rbx + 64] -// CHECK: encoding: [0xc4,0xe5,0x79,0xf8,0x74,0x23,0x40] - t2rpntlvwz1rs tmm6, [rbx + 64] - -// CHECK: t2rpntlvwz1rs tmm2, [2*rbp - 32] -// CHECK: encoding: [0xc4,0xe5,0x79,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz1rs tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz1rst1 tmm6, [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0xc4,0xa5,0x79,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz1rst1 tmm6, [rbp + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz1rst1 tmm2, [r8 + 4*rax + 291] -// CHECK: encoding: [0xc4,0xc5,0x79,0xf9,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz1rst1 tmm2, [r8 + 4*rax + 291] - -// CHECK: t2rpntlvwz1rst1 tmm6, [rbx + 64] -// CHECK: encoding: [0xc4,0xe5,0x79,0xf9,0x74,0x23,0x40] - t2rpntlvwz1rst1 tmm6, [rbx + 64] - -// CHECK: t2rpntlvwz1rst1 tmm2, [2*rbp - 32] -// CHECK: encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz1rst1 tmm2, [2*rbp - 32] - // CHECK: tileloaddrs tmm6, [rbp + 8*r14 + 268435456] // CHECK: encoding: [0xc4,0xa2,0x7b,0x4a,0xb4,0xf5,0x00,0x00,0x00,0x10] tileloaddrs tmm6, [rbp + 8*r14 + 268435456] @@ -96,70 +32,6 @@ // CHECK: encoding: [0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff] tileloaddrst1 tmm3, [2*rbp - 32] -// CHECK: t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291] -// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291] - -// CHECK: t2rpntlvwz0rs tmm6, [r18 + 64] -// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40] - t2rpntlvwz0rs tmm6, [r18 + 64] - -// CHECK: {evex} t2rpntlvwz0rs tmm2, [2*rbp - 32] -// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz0rs tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291] -// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291] - -// CHECK: t2rpntlvwz0rst1 tmm6, [r18 + 64] -// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40] - t2rpntlvwz0rst1 tmm6, [r18 + 64] - -// CHECK: {evex} t2rpntlvwz0rst1 tmm2, [2*rbp - 32] -// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz0rst1 tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291] -// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291] - -// CHECK: t2rpntlvwz1rs tmm6, [r18 + 64] -// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40] - t2rpntlvwz1rs tmm6, [r18 + 64] - -// CHECK: {evex} t2rpntlvwz1rs tmm2, [2*rbp - 32] -// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz1rs tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291] -// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291] - -// CHECK: t2rpntlvwz1rst1 tmm6, [r18 + 64] -// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40] - t2rpntlvwz1rst1 tmm6, [r18 + 64] - -// CHECK: {evex} t2rpntlvwz1rst1 tmm2, [2*rbp - 32] -// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz1rst1 tmm2, [2*rbp - 32] - // CHECK: tileloaddrs tmm6, [r16 + 8*r14 + 268435456] // CHECK: encoding: [0x62,0xba,0x7f,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10] tileloaddrs tmm6, [r16 + 8*r14 + 268435456] diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s index b413597..d1d0997 100644 --- a/llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s +++ b/llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s @@ -8,10 +8,3 @@ // CHECK: encoding: [0xc4,0xe2,0x71,0x48,0xda] tmmultf32ps %tmm1, %tmm2, %tmm3 -// CHECK: ttmmultf32ps %tmm4, %tmm5, %tmm6 -// CHECK: encoding: [0xc4,0xe2,0x58,0x48,0xf5] - ttmmultf32ps %tmm4, %tmm5, %tmm6 - -// CHECK: ttmmultf32ps %tmm1, %tmm2, %tmm3 -// CHECK: encoding: [0xc4,0xe2,0x70,0x48,0xda] - ttmmultf32ps %tmm1, %tmm2, %tmm3 diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s index 98f5527..b6c0947 100644 --- a/llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s +++ b/llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s @@ -8,10 +8,3 @@ // CHECK: encoding: [0xc4,0xe2,0x71,0x48,0xda] tmmultf32ps tmm3, tmm2, tmm1 -// CHECK: ttmmultf32ps tmm6, tmm5, tmm4 -// CHECK: encoding: [0xc4,0xe2,0x58,0x48,0xf5] - ttmmultf32ps tmm6, tmm5, tmm4 - -// CHECK: ttmmultf32ps tmm3, tmm2, tmm1 -// CHECK: encoding: [0xc4,0xe2,0x70,0x48,0xda] - ttmmultf32ps tmm3, tmm2, tmm1 diff --git a/llvm/test/MC/X86/amx-transpose-att.s b/llvm/test/MC/X86/amx-transpose-att.s deleted file mode 100644 index 5158470..0000000 --- a/llvm/test/MC/X86/amx-transpose-att.s +++ /dev/null @@ -1,153 +0,0 @@ -// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s - -// CHECK: t2rpntlvwz0 268435456(%rbp,%r14,8), %tmm4 -// CHECK: encoding: [0xc4,0xa2,0x78,0x6e,0xa4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz0 268435456(%rbp,%r14,8), %tmm4 - -// CHECK: t2rpntlvwz0 291(%r8,%rax,4), %tmm2 -// CHECK: encoding: [0xc4,0xc2,0x78,0x6e,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz0 291(%r8,%rax,4), %tmm2 - -// CHECK: t2rpntlvwz0 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0xc4,0xe2,0x78,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz0 -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz0t1 268435456(%rbp,%r14,8), %tmm4 -// CHECK: encoding: [0xc4,0xa2,0x78,0x6f,0xa4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz0t1 268435456(%rbp,%r14,8), %tmm5 - -// CHECK: t2rpntlvwz0t1 291(%r8,%rax,4), %tmm2 -// CHECK: encoding: [0xc4,0xc2,0x78,0x6f,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz0t1 291(%r8,%rax,4), %tmm2 - -// CHECK: t2rpntlvwz0t1 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0xc4,0xe2,0x78,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz0t1 -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz1 268435456(%rbp,%r14,8), %tmm4 -// CHECK: encoding: [0xc4,0xa2,0x79,0x6e,0xa4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz1 268435456(%rbp,%r14,8), %tmm5 - -// CHECK: t2rpntlvwz1 291(%r8,%rax,4), %tmm2 -// CHECK: encoding: [0xc4,0xc2,0x79,0x6e,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz1 291(%r8,%rax,4), %tmm2 - -// CHECK: t2rpntlvwz1 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz1 -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz1t1 268435456(%rbp,%r14,8), %tmm2 -// CHECK: encoding: [0xc4,0xa2,0x79,0x6f,0x94,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz1t1 268435456(%rbp,%r14,8), %tmm3 - -// CHECK: t2rpntlvwz1t1 291(%r8,%rax,4), %tmm2 -// CHECK: encoding: [0xc4,0xc2,0x79,0x6f,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz1t1 291(%r8,%rax,4), %tmm2 - -// CHECK: t2rpntlvwz1t1 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz1t1 -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4 -// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4 - -// CHECK: t2rpntlvwz0 291(%r8,%r17,4), %tmm2 -// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz0 291(%r8,%r17,4), %tmm2 - -// CHECK: {evex} t2rpntlvwz0 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz0 -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4 -// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4 - -// CHECK: t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2 -// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2 - -// CHECK: {evex} t2rpntlvwz0t1 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz0t1 -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4 -// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4 - -// CHECK: t2rpntlvwz1 291(%r8,%r17,4), %tmm2 -// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz1 291(%r8,%r17,4), %tmm2 - -// CHECK: {evex} t2rpntlvwz1 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz1 -32(,%rbp,2), %tmm2 - -// CHECK: t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4 -// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4 - -// CHECK: t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2 -// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2 - -// CHECK: {evex} t2rpntlvwz1t1 -32(,%rbp,2), %tmm2 -// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz1t1 -32(,%rbp,2), %tmm2 - -// CHECK: ttransposed %tmm1, %tmm5 -// CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xe9] - ttransposed %tmm1, %tmm5 - -// CHECK: ttransposed %tmm2, %tmm3 -// CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xda] - ttransposed %tmm2, %tmm3 - -// CHECK: ttdpbf16ps %tmm1, %tmm2, %tmm5 -// CHECK: encoding: [0xc4,0xe2,0x72,0x6c,0xea] - ttdpbf16ps %tmm1, %tmm2, %tmm5 - -// CHECK: ttdpbf16ps %tmm1, %tmm2, %tmm3 -// CHECK: encoding: [0xc4,0xe2,0x72,0x6c,0xda] - ttdpbf16ps %tmm1, %tmm2, %tmm3 - -// CHECK: ttdpfp16ps %tmm3, %tmm4, %tmm5 -// CHECK: encoding: [0xc4,0xe2,0x63,0x6c,0xec] - ttdpfp16ps %tmm3, %tmm4, %tmm5 - -// CHECK: ttdpfp16ps %tmm1, %tmm2, %tmm3 -// CHECK: encoding: [0xc4,0xe2,0x73,0x6c,0xda] - ttdpfp16ps %tmm1, %tmm2, %tmm3 - -// CHECK: ttcmmimfp16ps %tmm4, %tmm5, %tmm6 -// CHECK: encoding: [0xc4,0xe2,0x5b,0x6b,0xf5] - ttcmmimfp16ps %tmm4, %tmm5, %tmm6 - -// CHECK: ttcmmimfp16ps %tmm1, %tmm2, %tmm3 -// CHECK: encoding: [0xc4,0xe2,0x73,0x6b,0xda] - ttcmmimfp16ps %tmm1, %tmm2, %tmm3 - -// CHECK: ttcmmrlfp16ps %tmm4, %tmm5, %tmm6 -// CHECK: encoding: [0xc4,0xe2,0x5a,0x6b,0xf5] - ttcmmrlfp16ps %tmm4, %tmm5, %tmm6 - -// CHECK: ttcmmrlfp16ps %tmm1, %tmm2, %tmm3 -// CHECK: encoding: [0xc4,0xe2,0x72,0x6b,0xda] - ttcmmrlfp16ps %tmm1, %tmm2, %tmm3 - -// CHECK: tconjtcmmimfp16ps %tmm4, %tmm5, %tmm6 -// CHECK: encoding: [0xc4,0xe2,0x58,0x6b,0xf5] - tconjtcmmimfp16ps %tmm4, %tmm5, %tmm6 - -// CHECK: tconjtcmmimfp16ps %tmm1, %tmm2, %tmm3 -// CHECK: encoding: [0xc4,0xe2,0x70,0x6b,0xda] - tconjtcmmimfp16ps %tmm1, %tmm2, %tmm3 - -// CHECK: tconjtfp16 %tmm5, %tmm6 -// CHECK: encoding: [0xc4,0xe2,0x79,0x6b,0xf5] - tconjtfp16 %tmm5, %tmm6 - -// CHECK: tconjtfp16 %tmm2, %tmm3 -// CHECK: encoding: [0xc4,0xe2,0x79,0x6b,0xda] - tconjtfp16 %tmm2, %tmm3 diff --git a/llvm/test/MC/X86/amx-transpose-intel.s b/llvm/test/MC/X86/amx-transpose-intel.s deleted file mode 100644 index 0d2c22f..0000000 --- a/llvm/test/MC/X86/amx-transpose-intel.s +++ /dev/null @@ -1,153 +0,0 @@ -// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s - -// CHECK: t2rpntlvwz0 tmm6, [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0xc4,0xa2,0x78,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz0 tmm6, [rbp + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz0 tmm2, [r8 + 4*rax + 291] -// CHECK: encoding: [0xc4,0xc2,0x78,0x6e,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz0 tmm2, [r8 + 4*rax + 291] - -// CHECK: t2rpntlvwz0 tmm2, [2*rbp - 32] -// CHECK: encoding: [0xc4,0xe2,0x78,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz0 tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz0t1 tmm6, [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0xc4,0xa2,0x78,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz0t1 tmm7, [rbp + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz0t1 tmm2, [r8 + 4*rax + 291] -// CHECK: encoding: [0xc4,0xc2,0x78,0x6f,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz0t1 tmm2, [r8 + 4*rax + 291] - -// CHECK: t2rpntlvwz0t1 tmm2, [2*rbp - 32] -// CHECK: encoding: [0xc4,0xe2,0x78,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz0t1 tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz1 tmm0, [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0xc4,0xa2,0x79,0x6e,0x84,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz1 tmm1, [rbp + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz1 tmm2, [r8 + 4*rax + 291] -// CHECK: encoding: [0xc4,0xc2,0x79,0x6e,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz1 tmm2, [r8 + 4*rax + 291] - -// CHECK: t2rpntlvwz1 tmm2, [2*rbp - 32] -// CHECK: encoding: [0xc4,0xe2,0x79,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz1 tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz1t1 tmm6, [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0xc4,0xa2,0x79,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10] - t2rpntlvwz1t1 tmm6, [rbp + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz1t1 tmm2, [r8 + 4*rax + 291] -// CHECK: encoding: [0xc4,0xc2,0x79,0x6f,0x94,0x80,0x23,0x01,0x00,0x00] - t2rpntlvwz1t1 tmm2, [r8 + 4*rax + 291] - -// CHECK: t2rpntlvwz1t1 tmm2, [2*rbp - 32] -// CHECK: encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] - t2rpntlvwz1t1 tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291] -// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291] - -// CHECK: {evex} t2rpntlvwz0 tmm2, [2*rbp - 32] -// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz0 tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291] -// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291] - -// CHECK: {evex} t2rpntlvwz0t1 tmm2, [2*rbp - 32] -// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz0t1 tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291] -// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291] - -// CHECK: {evex} t2rpntlvwz1 tmm2, [2*rbp - 32] -// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz1 tmm2, [2*rbp - 32] - -// CHECK: t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10] - t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456] - -// CHECK: t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291] -// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00] - t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291] - -// CHECK: {evex} t2rpntlvwz1t1 tmm2, [2*rbp - 32] -// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff] - {evex} t2rpntlvwz1t1 tmm2, [2*rbp - 32] - -// CHECK: ttransposed tmm5, tmm1 -// CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xe9] - ttransposed tmm5, tmm1 - -// CHECK: ttransposed tmm3, tmm2 -// CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xda] - ttransposed tmm3, tmm2 - -// CHECK: ttdpbf16ps tmm5, tmm0, tmm4 -// CHECK: encoding: [0xc4,0xe2,0x5a,0x6c,0xe8] - ttdpbf16ps tmm5, tmm0, tmm4 - -// CHECK: ttdpbf16ps tmm3, tmm2, tmm1 -// CHECK: encoding: [0xc4,0xe2,0x72,0x6c,0xda] - ttdpbf16ps tmm3, tmm2, tmm1 - -// CHECK: ttdpfp16ps tmm1, tmm0, tmm4 -// CHECK: encoding: [0xc4,0xe2,0x5b,0x6c,0xc8] - ttdpfp16ps tmm1, tmm0, tmm4 - -// CHECK: ttdpfp16ps tmm3, tmm2, tmm1 -// CHECK: encoding: [0xc4,0xe2,0x73,0x6c,0xda] - ttdpfp16ps tmm3, tmm2, tmm1 - -// CHECK: ttcmmimfp16ps tmm6, tmm5, tmm4 -// CHECK: encoding: [0xc4,0xe2,0x5b,0x6b,0xf5] - ttcmmimfp16ps tmm6, tmm5, tmm4 - -// CHECK: ttcmmimfp16ps tmm3, tmm2, tmm1 -// CHECK: encoding: [0xc4,0xe2,0x73,0x6b,0xda] - ttcmmimfp16ps tmm3, tmm2, tmm1 - -// CHECK: ttcmmrlfp16ps tmm6, tmm5, tmm4 -// CHECK: encoding: [0xc4,0xe2,0x5a,0x6b,0xf5] - ttcmmrlfp16ps tmm6, tmm5, tmm4 - -// CHECK: ttcmmrlfp16ps tmm3, tmm2, tmm1 -// CHECK: encoding: [0xc4,0xe2,0x72,0x6b,0xda] - ttcmmrlfp16ps tmm3, tmm2, tmm1 - -// CHECK: tconjtcmmimfp16ps tmm6, tmm5, tmm4 -// CHECK: encoding: [0xc4,0xe2,0x58,0x6b,0xf5] - tconjtcmmimfp16ps tmm6, tmm5, tmm4 - -// CHECK: tconjtcmmimfp16ps tmm3, tmm2, tmm1 -// CHECK: encoding: [0xc4,0xe2,0x70,0x6b,0xda] - tconjtcmmimfp16ps tmm3, tmm2, tmm1 - -// CHECK: tconjtfp16 tmm6, tmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x6b,0xf5] - tconjtfp16 tmm6, tmm5 - -// CHECK: tconjtfp16 tmm3, tmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x6b,0xda] - tconjtfp16 tmm3, tmm2 diff --git a/llvm/test/TableGen/x86-instr-mapping.inc b/llvm/test/TableGen/x86-instr-mapping.inc index f621979..6d2873e 100644 --- a/llvm/test/TableGen/x86-instr-mapping.inc +++ b/llvm/test/TableGen/x86-instr-mapping.inc @@ -167,14 +167,6 @@ static const X86TableEntry X86CompressEVEXTable[] = { { X86::SHRX64rm_EVEX, X86::SHRX64rm }, { X86::SHRX64rr_EVEX, X86::SHRX64rr }, { X86::STTILECFG_EVEX, X86::STTILECFG }, - { X86::T2RPNTLVWZ0RST1_EVEX, X86::T2RPNTLVWZ0RST1 }, - { X86::T2RPNTLVWZ0RS_EVEX, X86::T2RPNTLVWZ0RS }, - { X86::T2RPNTLVWZ0T1_EVEX, X86::T2RPNTLVWZ0T1 }, - { X86::T2RPNTLVWZ0_EVEX, X86::T2RPNTLVWZ0 }, - { X86::T2RPNTLVWZ1RST1_EVEX, X86::T2RPNTLVWZ1RST1 }, - { X86::T2RPNTLVWZ1RS_EVEX, X86::T2RPNTLVWZ1RS }, - { X86::T2RPNTLVWZ1T1_EVEX, X86::T2RPNTLVWZ1T1 }, - { X86::T2RPNTLVWZ1_EVEX, X86::T2RPNTLVWZ1 }, { X86::TILELOADDRST1_EVEX, X86::TILELOADDRST1 }, { X86::TILELOADDRS_EVEX, X86::TILELOADDRS }, { X86::TILELOADDT1_EVEX, X86::TILELOADDT1 }, diff --git a/llvm/test/Transforms/InstCombine/vec_extract_var_elt-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_extract_var_elt-inseltpoison.ll deleted file mode 100644 index 9fcac80..0000000 --- a/llvm/test/Transforms/InstCombine/vec_extract_var_elt-inseltpoison.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: opt < %s -passes=instcombine -S | FileCheck %s - -define void @test (float %b, ptr %p) { -; CHECK: extractelement -; CHECK: fptosi - %1 = load <8 x float> , ptr %p - %2 = bitcast <8 x float> %1 to <8 x i32> - %3 = bitcast <8 x i32> %2 to <8 x float> - %a = fptosi <8 x float> %3 to <8 x i32> - %4 = fptosi float %b to i32 - %5 = add i32 %4, -2 - %6 = extractelement <8 x i32> %a, i32 %5 - %7 = insertelement <8 x i32> poison, i32 %6, i32 7 - %8 = sitofp <8 x i32> %7 to <8 x float> - store <8 x float> %8, ptr %p - ret void -} - -; PR18600 -define i32 @test2(i32 %i) { - %e = extractelement <4 x i32> bitcast (<2 x i64> <i64 1, i64 2> to <4 x i32>), i32 %i - ret i32 %e - -; CHECK-LABEL: @test2 -; CHECK: extractelement -} diff --git a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll index 32bf4da..205b4b8 100644 --- a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll +++ b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll @@ -1,26 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s -define void @test (float %b, ptr %p) { -; CHECK: extractelement -; CHECK: fptosi - %1 = load <8 x float> , ptr %p +define void @test_poison(float %b, ptr %p) { +; CHECK-LABEL: define void @test_poison( +; CHECK-SAME: float [[B:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[P]], align 32 +; CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[B]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -2 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fptosi float [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 7 +; CHECK-NEXT: [[TMP7:%.*]] = sitofp <8 x i32> [[TMP6]] to <8 x float> +; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[P]], align 32 +; CHECK-NEXT: ret void +; + %1 = load <8 x float>, ptr %p %2 = bitcast <8 x float> %1 to <8 x i32> %3 = bitcast <8 x i32> %2 to <8 x float> %a = fptosi <8 x float> %3 to <8 x i32> %4 = fptosi float %b to i32 %5 = add i32 %4, -2 %6 = extractelement <8 x i32> %a, i32 %5 - %7 = insertelement <8 x i32> undef, i32 %6, i32 7 + %7 = insertelement <8 x i32> poison, i32 %6, i32 7 %8 = sitofp <8 x i32> %7 to <8 x float> store <8 x float> %8, ptr %p - ret void + ret void } ; PR18600 -define i32 @test2(i32 %i) { +define i32 @test_bitcast(i32 %i) { +; CHECK-LABEL: define i32 @test_bitcast( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: [[E:%.*]] = extractelement <4 x i32> <i32 1, i32 0, i32 2, i32 0>, i32 [[I]] +; CHECK-NEXT: ret i32 [[E]] +; %e = extractelement <4 x i32> bitcast (<2 x i64> <i64 1, i64 2> to <4 x i32>), i32 %i ret i32 %e +} + +declare void @use(i32) -; CHECK-LABEL: @test2 -; CHECK: extractelement +define void @test_loop(<4 x float> %in) { +; CHECK-LABEL: define void @test_loop( +; CHECK-SAME: <4 x float> [[IN:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[R:%.*]] = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> [[IN]], i32 9) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LATCH:.*]] ] +; CHECK-NEXT: [[COND:%.*]] = icmp samesign ult i32 [[I]], 4 +; CHECK-NEXT: br i1 [[COND]], label %[[BODY:.*]], label %[[DONE:.*]] +; CHECK: [[BODY]]: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[R]], i32 [[I]] +; CHECK-NEXT: [[ELEM:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK-NEXT: call void @use(i32 [[ELEM]]) +; CHECK-NEXT: br label %[[LATCH]] +; CHECK: [[LATCH]]: +; CHECK-NEXT: [[NEXT]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: br label %[[LOOP]] +; CHECK: [[DONE]]: +; CHECK-NEXT: ret void +; +entry: + %r = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %in, i32 9) + %vi = fptosi <4 x float> %r to <4 x i32> + br label %loop +loop: + %i = phi i32 [ 0, %entry ], [ %next, %latch ] + %cond = icmp ult i32 %i, 4 + br i1 %cond, label %body, label %done +body: + %elem = extractelement <4 x i32> %vi, i32 %i + call void @use(i32 %elem) + br label %latch +latch: + %next = add i32 %i, 1 + br label %loop +done: + ret void } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll index 77a7f0d..479b3f8 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll @@ -12,8 +12,7 @@ define i32 @add_0() { define i32 @add_0_scalable_vector() { ; CHECK-LABEL: @add_0_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 0 ; %x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> zeroinitializer) ret i32 %x @@ -89,8 +88,7 @@ define i32 @add_poison() { define i32 @add_poison_scalable_vector() { ; CHECK-LABEL: @add_poison_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> poison) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> poison) ret i32 %x @@ -123,8 +121,7 @@ define i32 @mul_0() { define i32 @mul_0_scalable_vector() { ; CHECK-LABEL: @mul_0_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 0 ; %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> zeroinitializer) ret i32 %x @@ -140,13 +137,29 @@ define i32 @mul_1() { define i32 @mul_1_scalable_vector() { ; CHECK-LABEL: @mul_1_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 1)) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 1 ; %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 1)) ret i32 %x } +define i32 @mul_2() { +; CHECK-LABEL: @mul_2( +; CHECK-NEXT: ret i32 256 +; + %x = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>) + ret i32 %x +} + +define i32 @mul_2_scalable_vector() { +; CHECK-LABEL: @mul_2_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 2)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 2)) + ret i32 %x +} + define i32 @mul_inc() { ; CHECK-LABEL: @mul_inc( ; CHECK-NEXT: ret i32 40320 @@ -200,8 +213,7 @@ define i32 @mul_poison() { define i32 @mul_poison_scalable_vector() { ; CHECK-LABEL: @mul_poison_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> poison) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> poison) ret i32 %x @@ -225,8 +237,7 @@ define i32 @and_0() { define i32 @and_0_scalable_vector() { ; CHECK-LABEL: @and_0_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 0 ; %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> zeroinitializer) ret i32 %x @@ -242,8 +253,7 @@ define i32 @and_1() { define i32 @and_1_scalable_vector() { ; CHECK-LABEL: @and_1_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> splat (i32 1)) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 1 ; %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> splat (i32 1)) ret i32 %x @@ -302,8 +312,7 @@ define i32 @and_poison() { define i32 @and_poison_scalable_vector() { ; CHECK-LABEL: @and_poison_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> poison) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> poison) ret i32 %x @@ -327,8 +336,7 @@ define i32 @or_0() { define i32 @or_0_scalable_vector() { ; CHECK-LABEL: @or_0_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 0 ; %x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> zeroinitializer) ret i32 %x @@ -344,8 +352,7 @@ define i32 @or_1() { define i32 @or_1_scalable_vector() { ; CHECK-LABEL: @or_1_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> splat (i32 1)) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 1 ; %x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> splat (i32 1)) ret i32 %x @@ -404,8 +411,7 @@ define i32 @or_poison() { define i32 @or_poison_scalable_vector() { ; CHECK-LABEL: @or_poison_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> poison) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> poison) ret i32 %x @@ -429,8 +435,7 @@ define i32 @xor_0() { define i32 @xor_0_scalable_vector() { ; CHECK-LABEL: @xor_0_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 0 ; %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> zeroinitializer) ret i32 %x @@ -446,13 +451,21 @@ define i32 @xor_1() { define i32 @xor_1_scalable_vector() { ; CHECK-LABEL: @xor_1_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> splat (i32 1)) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 0 ; %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> splat(i32 1)) ret i32 %x } +define i32 @xor_1_scalable_vector_lane_count_not_known_even() { +; CHECK-LABEL: @xor_1_scalable_vector_lane_count_not_known_even( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 1 x i32> splat(i32 1)) + ret i32 %x +} + define i32 @xor_inc() { ; CHECK-LABEL: @xor_inc( ; CHECK-NEXT: ret i32 10 @@ -506,8 +519,7 @@ define i32 @xor_poison() { define i32 @xor_poison_scalable_vector() { ; CHECK-LABEL: @xor_poison_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> poison) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> poison) ret i32 %x @@ -531,8 +543,7 @@ define i32 @smin_0() { define i32 @smin_0_scalable_vector() { ; CHECK-LABEL: @smin_0_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 0 ; %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> zeroinitializer) ret i32 %x @@ -548,8 +559,7 @@ define i32 @smin_1() { define i32 @smin_1_scalable_vector() { ; CHECK-LABEL: @smin_1_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> splat (i32 1)) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 1 ; %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> splat(i32 1)) ret i32 %x @@ -608,8 +618,7 @@ define i32 @smin_poison() { define i32 @smin_poison_scalable_vector() { ; CHECK-LABEL: @smin_poison_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> poison) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> poison) ret i32 %x @@ -633,8 +642,7 @@ define i32 @smax_0() { define i32 @smax_0_scalable_vector() { ; CHECK-LABEL: @smax_0_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 0 ; %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> zeroinitializer) ret i32 %x @@ -650,8 +658,7 @@ define i32 @smax_1() { define i32 @smax_1_scalable_vector() { ; CHECK-LABEL: @smax_1_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> splat (i32 1)) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 1 ; %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> splat(i32 1)) ret i32 %x @@ -710,8 +717,7 @@ define i32 @smax_poison() { define i32 @smax_poison_scalable_vector() { ; CHECK-LABEL: @smax_poison_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> poison) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> poison) ret i32 %x @@ -735,8 +741,7 @@ define i32 @umin_0() { define i32 @umin_0_scalable_vector() { ; CHECK-LABEL: @umin_0_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 0 ; %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> zeroinitializer) ret i32 %x @@ -752,8 +757,7 @@ define i32 @umin_1() { define i32 @umin_1_scalable_vector() { ; CHECK-LABEL: @umin_1_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> splat (i32 1)) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 1 ; %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> splat (i32 1)) ret i32 %x @@ -812,8 +816,7 @@ define i32 @umin_poison() { define i32 @umin_poison_scalable_vector() { ; CHECK-LABEL: @umin_poison_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> poison) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> poison) ret i32 %x @@ -837,8 +840,7 @@ define i32 @umax_0() { define i32 @umax_0_scalable_vector() { ; CHECK-LABEL: @umax_0_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 0 ; %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> zeroinitializer) ret i32 %x @@ -854,8 +856,7 @@ define i32 @umax_1() { define i32 @umax_1_scalable_vector() { ; CHECK-LABEL: @umax_1_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> splat (i32 1)) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 1 ; %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> splat(i32 1)) ret i32 %x @@ -914,8 +915,7 @@ define i32 @umax_poison() { define i32 @umax_poison_scalable_vector() { ; CHECK-LABEL: @umax_poison_scalable_vector( -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> poison) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> poison) ret i32 %x diff --git a/llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel.ll index 1339afe..1339afe 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel.ll diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll new file mode 100644 index 0000000..96b31d8 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll @@ -0,0 +1,160 @@ +; Test branch weight metadata, estimated trip count metadata, and block +; frequencies after loop unrolling with an epilogue. + +; ------------------------------------------------------------------------------ +; Define substitutions. +; +; Check original loop body frequency. +; DEFINE: %{bf-fc} = opt %s -S -passes='print<block-freq>' 2>&1 | \ +; DEFINE: FileCheck %s -check-prefixes +; +; Unroll loops and then check block frequency. The -implicit-check-not options +; make sure that no additional labels or @f calls show up. +; DEFINE: %{ur-bf} = opt %s -S -passes='loop-unroll,print<block-freq>' 2>&1 +; DEFINE: %{fc} = FileCheck %s \ +; DEFINE: -implicit-check-not='{{^( *- )?[^ ;]*:}}' \ +; DEFINE: -implicit-check-not='call void @f' -check-prefixes + +; ------------------------------------------------------------------------------ +; Check various interesting unroll count values relative to the original loop's +; estimated trip count of 11 (e.g., minimum and boundary values). +; +; RUN: %{bf-fc} ALL,ORIG +; RUN: %{ur-bf} -unroll-count=2 -unroll-runtime | %{fc} ALL,UR,UR2 +; RUN: %{ur-bf} -unroll-count=4 -unroll-runtime | %{fc} ALL,UR,UR4 +; RUN: %{ur-bf} -unroll-count=10 -unroll-runtime | %{fc} ALL,UR,UR10 +; RUN: %{ur-bf} -unroll-count=11 -unroll-runtime | %{fc} ALL,UR,UR11 +; RUN: %{ur-bf} -unroll-count=12 -unroll-runtime | %{fc} ALL,UR,UR12 + +; ------------------------------------------------------------------------------ +; Check the iteration frequencies, which, when each is multiplied by the number +; of original loop bodies that execute within it, should sum to almost exactly +; the original loop body frequency. +; +; ALL-LABEL: block-frequency-info: test +; +; ORIG: - [[ENTRY:.*]]: +; ORIG: - [[DO_BODY:.*]]: float = 11.0, +; ORIG: - [[DO_END:.*]]: +; +; UR: - [[ENTRY:.*]]: +; UR: - [[ENTRY_NEW:.*]]: +; UR2: - [[DO_BODY:.*]]: float = 5.2381, +; UR4: - [[DO_BODY:.*]]: float = 2.3702, +; UR10: - [[DO_BODY:.*]]: float = 0.6902, +; UR11: - [[DO_BODY:.*]]: float = 0.59359, +; UR12: - [[DO_BODY:.*]]: float = 0.5144, +; UR: - [[DO_END_UNR_LCSSA:.*]]: +; UR: - [[DO_BODY_EPIL_PREHEADER:.*]]: +; UR2: - [[DO_BODY_EPIL:.*]]: float = 0.52381, +; UR4: - [[DO_BODY_EPIL:.*]]: float = 1.5193, +; UR10: - [[DO_BODY_EPIL:.*]]: float = 4.098, +; UR11: - [[DO_BODY_EPIL:.*]]: float = 4.4705, +; UR12: - [[DO_BODY_EPIL:.*]]: float = 4.8272, +; UR4: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR10: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR11: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR12: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR: - [[DO_END:.*]]: + +; ------------------------------------------------------------------------------ +; Check the CFGs, including the number of original loop bodies that appear +; within each unrolled iteration. +; +; UR-LABEL: define void @test(i32 %{{.*}}) { +; UR: [[ENTRY]]: +; UR: br i1 %{{.*}}, label %[[DO_BODY_EPIL_PREHEADER]], label %[[ENTRY_NEW]], !prof ![[#PROF_UR_GUARD:]]{{$}} +; UR: [[ENTRY_NEW]]: +; UR: br label %[[DO_BODY]] +; UR: [[DO_BODY]]: +; UR2-COUNT-2: call void @f +; UR4-COUNT-4: call void @f +; UR10-COUNT-10: call void @f +; UR11-COUNT-11: call void @f +; UR12-COUNT-12: call void @f +; UR: br i1 %{{.*}}, label %[[DO_END_UNR_LCSSA]], label %[[DO_BODY]], !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]]{{$}} +; UR: [[DO_END_UNR_LCSSA]]: +; UR: br i1 %{{.*}}, label %[[DO_BODY_EPIL_PREHEADER]], label %[[DO_END:.*]], !prof ![[#PROF_RM_GUARD:]]{{$}} +; UR: [[DO_BODY_EPIL_PREHEADER]]: +; UR: br label %[[DO_BODY_EPIL]] +; UR: [[DO_BODY_EPIL]]: +; UR: call void @f +; UR4: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR10: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR11: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR12: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR4: [[DO_END_EPILOG_LCSSA]]: +; UR10: [[DO_END_EPILOG_LCSSA]]: +; UR11: [[DO_END_EPILOG_LCSSA]]: +; UR12: [[DO_END_EPILOG_LCSSA]]: +; UR: br label %[[DO_END]] +; UR: [[DO_END]]: +; UR: ret void + +declare void @f(i32) + +define void @test(i32 %n) { +entry: + br label %do.body + +do.body: + %i = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %inc = add i32 %i, 1 + call void @f(i32 %i) + %c = icmp sge i32 %inc, %n + br i1 %c, label %do.end, label %do.body, !prof !0 + +do.end: + ret void +} + +!0 = !{!"branch_weights", i32 1, i32 10} + +; ------------------------------------------------------------------------------ +; Check branch weight metadata and estimated trip count metadata. +; +; UR2: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 195225786, i32 1952257862} +; UR4: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 534047398, i32 1613436250} +; UR10: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1236740947, i32 910742701} +; UR11: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1319535738, i32 827947910} +; UR12: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1394803730, i32 752679918} +; +; UR2: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 372703773, i32 1774779875} +; UR4: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 680723421, i32 1466760227} +; UR10: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1319535738, i32 827947910} +; UR11: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1394803730, i32 752679918} +; UR12: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1463229177, i32 684254471} +; +; UR2: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR4: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR10: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR11: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR12: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; +; UR2: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 5} +; UR4: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2} +; UR10: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} +; UR11: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} +; UR12: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} +; +; UR2: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1022611260, i32 1124872388} +; UR4: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1531603292, i32 615880356} +; UR10: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1829762672, i32 317720976} +; UR11: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1846907894, i32 300575754} +; UR12: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1860963812, i32 286519836} +; +; UR4: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1038564635, i32 1108919013} +; UR10: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1656332913, i32 491150735} +; UR11: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1693034047, i32 454449601} +; UR12: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1723419551, i32 424064097} + +; UR4: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; UR10: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR11: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; UR12: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; +; UR4: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} +; For UR10, llvm.loop.estimated_trip_count is the same for both loops. +; UR11: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; UR12: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 11} diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll new file mode 100644 index 0000000..cde9d46 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll @@ -0,0 +1,68 @@ +; Test branch weight metadata, estimated trip count metadata, and block +; frequencies after partial loop unrolling without -unroll-runtime. + +; RUN: opt < %s -S -passes='print<block-freq>' 2>&1 | \ +; RUN: FileCheck -check-prefix=CHECK %s + +; The -implicit-check-not options make sure that no additional labels or calls +; to @f show up. +; RUN: opt < %s -S -passes='loop-unroll,print<block-freq>' \ +; RUN: -unroll-count=4 2>&1 | \ +; RUN: FileCheck %s -check-prefix=CHECK-UR \ +; RUN: -implicit-check-not='{{^( *- )?[^ ;]*:}}' \ +; RUN: -implicit-check-not='call void @f' + +; CHECK: block-frequency-info: test +; CHECK: do.body: float = 10.0, + +; The sum should still be ~10. +; +; CHECK-UR: block-frequency-info: test +; CHECK-UR: - [[ENTRY:.*]]: +; CHECK-UR: - [[DO_BODY:.*]]: float = 2.9078, +; CHECK-UR: - [[DO_BODY_1:.*]]: float = 2.617, +; CHECK-UR: - [[DO_BODY_2:.*]]: float = 2.3553, +; CHECK-UR: - [[DO_BODY_3:.*]]: float = 2.1198, +; CHECK-UR: - [[DO_END:.*]]: + +declare void @f(i32) + +define void @test(i32 %n) { +; CHECK-UR-LABEL: define void @test(i32 %{{.*}}) { +; CHECK-UR: [[ENTRY]]: +; CHECK-UR: br label %[[DO_BODY]] +; CHECK-UR: [[DO_BODY]]: +; CHECK-UR: call void @f +; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_1]], !prof ![[#PROF:]] +; CHECK-UR: [[DO_BODY_1]]: +; CHECK-UR: call void @f +; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_2]], !prof ![[#PROF]] +; CHECK-UR: [[DO_BODY_2]]: +; CHECK-UR: call void @f +; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_3]], !prof ![[#PROF]] +; CHECK-UR: [[DO_BODY_3]]: +; CHECK-UR: call void @f +; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY]], !prof ![[#PROF]], !llvm.loop ![[#LOOP_UR_LATCH:]] +; CHECK-UR: [[DO_END]]: +; CHECK-UR: ret void + +entry: + br label %do.body + +do.body: + %i = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %inc = add i32 %i, 1 + call void @f(i32 %i) + %c = icmp sge i32 %inc, %n + br i1 %c, label %do.end, label %do.body, !prof !0 + +do.end: + ret void +} + +!0 = !{!"branch_weights", i32 1, i32 9} + +; CHECK-UR: ![[#PROF]] = !{!"branch_weights", i32 1, i32 9} +; CHECK-UR: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; CHECK-UR: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} +; CHECK-UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll index 0c52b5a0..0473601 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll @@ -188,7 +188,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 { ; CHECK-NEXT: [[L_1_LCSSA_UNR:%.*]] = phi i32 [ poison, [[OUTER_HEADER]] ], [ [[L_1_LCSSA_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[INNER_1_IV_UNR:%.*]] = phi i64 [ [[X]], [[OUTER_HEADER]] ], [ [[INNER_1_IV_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 7 -; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF3]] +; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF6:![0-9]+]] ; CHECK: outer.header.new: ; CHECK-NEXT: br label [[INNER_1_HEADER:%.*]] ; CHECK: inner.1.header: @@ -232,7 +232,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 { ; CHECK-NEXT: store i32 [[L_1_7]], ptr [[DST]], align 8 ; CHECK-NEXT: [[INNER_1_IV_NEXT_7]] = add i64 [[INNER_1_IV]], 8 ; CHECK-NEXT: [[CMP_2_7:%.*]] = icmp sgt i64 [[INNER_1_IV_NEXT_6]], 0 -; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: outer.middle.unr-lcssa: ; CHECK-NEXT: [[L_1_LCSSA_PH:%.*]] = phi i32 [ [[L_1_7]], [[INNER_1_LATCH_7]] ] ; CHECK-NEXT: br label [[OUTER_MIDDLE]] diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll index 2617199..2f8f98d 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll @@ -2,12 +2,24 @@ ;; Check that the remainder loop is properly assigned a branch weight for its latch branch. ; CHECK-LABEL: @test( -; CHECK-LABEL: for.body: -; CHECK: br i1 [[COND1:%.*]], label %for.end.loopexit.unr-lcssa, label %for.body, !prof ![[#PROF:]], !llvm.loop ![[#LOOP:]] -; CHECK-LABEL: for.body.epil: -; CHECK: br i1 [[COND2:%.*]], label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof ![[#PROF2:]], !llvm.loop ![[#LOOP2:]] -; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 2499} -; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 1, i32 1} +; CHECK-LABEL: entry: +; CHECK: [[FOR_BODY_PREHEADER:.*]]: +; CHECK: br i1 %{{.*}}, label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]], !prof ![[#PROF_UR_GUARD:]] +; CHECK: [[FOR_BODY_PREHEADER_NEW]]: +; CHECK: br label %for.body +; CHECK: for.body: +; CHECK: %add = add +; CHECK: %add.1 = add +; CHECK: %add.2 = add +; CHECK: %add.3 = add +; CHECK-NOT: %add.4 = add +; CHECK: br i1 %{{.*}}, label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %for.body, !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]] +; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]: +; CHECK: br i1 %{{.*}}, label %[[FOR_BODY_EPIL_PREHEADER]], label %[[FOR_END_LOOPEXIT:.*]], !prof ![[#PROF_RM_GUARD:]] +; CHECK: [[FOR_BODY_EPIL_PREHEADER]]: +; CHECK: br label %[[FOR_BODY_EPIL:.*]] +; CHECK: [[FOR_BODY_EPIL]]: +; CHECK: br i1 {{.*}}, label %[[FOR_BODY_EPIL]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]] define i3 @test(ptr %a, i3 %n) { entry: @@ -31,3 +43,37 @@ for.end: } !0 = !{!"branch_weights", i32 1, i32 9999} + +; Original loop probability: p = 9999/(1+9999) = 0.9999 +; Original estimated trip count: (1+9999)/1 = 10000 +; Unroll count: 4 + +; Probability of >=3 iterations after first: p^3 = 0.9970003 =~ +; 2146839468 / (644180 + 2146839468). +; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 644180, i32 2146839468} + +; Probability of >=4 more iterations: p^4 = 0.99960006 =~ +; 2146624784 / (858864 + 2146624784). +; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 858864, i32 2146624784} + +; 10000//4 = 2500 +; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2500} +; +; CHECK: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} + +; Probability of 1 to 3 more of 3 more remainder iterations: +; (p-p^4)/(1-p^4) = 0.749962497 =~ 1610532724 / (1610532724 + 536950924). +; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1610532724, i32 536950924} + +; Frequency of first remainder iter: r1 = 1 +; Frequency of second remainder iter: r2 = r1*(p-p^3)/(1-p^3) = 0.666633331 +; Frequency of third remainder iter: r3 = r2*(p-p^2)/(1-p^2) = 0.333299999 +; Solve for loop probability that produces that frequency: f = 1/(1-p') => +; p' = 1-1/f = 1-1/(r1+r2+r3) = 0.499983332 =~ +; 1073706403 / (1073706403 + 1073777245). +; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1073706403, i32 1073777245} + +; 10000%4 = 0 +; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll index 492de06..ec7aba4 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll @@ -295,11 +295,12 @@ exit2.loopexit: ; COMMON-LABEL: {{^}}!0 = ; EPILOG: [[EPILOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11} -; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 1, i32 127} -; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1, i32 7} -; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 3, i32 1} +; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 326124004, i32 1821359644} +; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1856428066, i32 291055582} +; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 1597681585, i32 549802063} -; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_LOOP_1:![0-9]+]]} +; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_TC:![0-9]+]], [[EPILOG_LOOP_1:![0-9]+]]} +; EPILOG: [[EPILOG_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} ; EPILOG: [[EPILOG_LOOP_1]] = !{!"llvm.loop.unroll.disable"} ; PROLOG: [[PROLOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11} diff --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll index 611ee5f..02f5bf9 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll @@ -3,14 +3,27 @@ @known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16 ; CHECK-LABEL: @bar_prof -; CHECK: loop: -; CHECK: %mul = mul -; CHECK: %mul.1 = mul -; CHECK: %mul.2 = mul -; CHECK: %mul.3 = mul -; CHECK: br i1 %niter.ncmp.7, label %loop.end.unr-lcssa, label %loop, !prof [[PROF0:![0-9]+]] -; CHECK: loop.epil: -; CHECK: br i1 %epil.iter.cmp, label %loop.epil, label %loop.end.epilog-lcssa, !prof [[PROF1:![0-9]+]], !llvm.loop {{![0-9]+}} +; CHECK: entry: +; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]], !prof ![[#PROF_UR_GUARD:]] +; CHECK: [[ENTRY_NEW]]: +; CHECK: br label %loop +; CHECK: loop: +; CHECK: %mul = mul +; CHECK: %mul.1 = mul +; CHECK: %mul.2 = mul +; CHECK: %mul.3 = mul +; CHECK: %mul.4 = mul +; CHECK: %mul.5 = mul +; CHECK: %mul.6 = mul +; CHECK: %mul.7 = mul +; CHECK-NOT: %mul.8 = mul +; CHECK: br i1 %{{.*}}, label %[[LOOP_END_UNR_LCSSA:.*]], label %loop, !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]] +; CHECK: [[LOOP_END_UNR_LCSSA]]: +; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL_PREHEADER]], label %loop.end, !prof ![[#PROF_RM_GUARD:]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL]], label %[[LOOP_END_EPILOG_LCSSA:.*]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]] define i32 @bar_prof(ptr noalias nocapture readonly %src, i64 %c) !prof !1 { entry: br label %loop @@ -60,5 +73,38 @@ loop.end: !1 = !{!"function_entry_count", i64 1} !2 = !{!"branch_weights", i32 1, i32 1000} -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 124} -; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 1} +; Original loop probability: p = 1000/(1+1000) = 0.999000999 +; Original estimated trip count: (1+1000)/1 = 1001 +; Unroll count: 8 + +; Probability of >=7 iterations after first: p^7 = 0.993027916 =~ +; 2132511214 / (14972434 + 2132511214). +; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 14972434, i32 2132511214} + +; Probability of >=8 more iterations: p^8 = 0.99203588 =~ +; 2130380833 / (17102815 + 2130380833). +; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 17102815, i32 2130380833} + +; 1001//8 = 125 +; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]]} +; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 125} + +; Probability of 1 to 7 more of 7 more remainder iterations: +; (p-p^8)/(1-p^8) = 0.874562282 =~ 1878108210 / (1878108210 + 269375438). +; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1878108210, i32 269375438} + +; Frequency of first remainder iter: r1 = 1 +; Frequency of second remainder iter: r2 = r1*(p-p^7)/(1-p^7) = 0.856714143 +; Frequency of third remainder iter: r3 = r2*(p-p^6)/(1-p^6) = 0.713571429 +; Frequency of fourth remainder iter: r4 = r2*(p-p^5)/(1-p^5) = 0.570571715 +; Frequency of fifth remainder iter: r5 = r2*(p-p^4)/(1-p^4) = 0.427714858 +; Frequency of sixth remainder iter: r6 = r2*(p-p^3)/(1-p^3) = 0.285000715 +; Frequency of seventh remainder iter: r7 = r2*(p-p^2)/(1-p^2) = 0.142429143 +; Solve for loop probability that produces that frequency: f = 1/(1-p') => +; p' = 1-1/f = 1-1/(r1+r2+r3+r4+r5+r6+r7) = 0.749749875 =~ +; 1610075606 / (1610075606 + 537408042). +; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1610075606, i32 537408042} + +; Remainder estimated trip count: 1001%8 = 1 +; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index bfee39ea..068f82c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -365,8 +365,8 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = alloca i8, i64 0, align 16 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> [[TMP7]], ptr [[TMP6]], i32 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll index ea01489..0a9494e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll @@ -10,8 +10,8 @@ define void @licm_replicate_call(double %x, ptr %dst) { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00) -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 157b787..3558957 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -64,9 +64,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] ; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]] -; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP5]], i32 1 +; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR2:[0-9]+]] +; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 +; TFCOMMON-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer ; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; TFCOMMON-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 @@ -79,7 +79,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TFCOMMON-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]] ; TFCOMMON: pred.store.if1: -; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1 +; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0 ; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8 ; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]] ; TFCOMMON: pred.store.continue2: @@ -105,9 +105,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]] -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0 -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP9]], i32 1 +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR2:[0-9]+]] +; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer ; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 @@ -120,7 +120,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; TFA_INTERLEAVE: pred.store.if3: -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 ; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]] ; TFA_INTERLEAVE: pred.store.continue4: @@ -134,7 +134,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] ; TFA_INTERLEAVE: pred.store.if7: -; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 +; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 ; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]] ; TFA_INTERLEAVE: pred.store.continue8: diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll index 03087bb..4590dfc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll @@ -199,10 +199,8 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) { ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 7 ; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[A]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i1> [[TMP8]], i1 [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP9]], i1 [[TMP10]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 3 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll new file mode 100644 index 0000000..857b913 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll @@ -0,0 +1,100 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s + +define void @test_widen_metadata(ptr noalias %A, ptr noalias %B, i32 %n) { +; CHECK-LABEL: Checking a loop in 'test_widen_metadata' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK: <x1> vector loop: { +; CHECK: vector.body: +; CHECK: WIDEN ir<%lv> = load vp<{{.*}}> +; CHECK: WIDEN-CAST ir<%conv> = sitofp ir<%lv> to float +; CHECK: WIDEN ir<%mul> = fmul ir<%conv>, ir<2.000000e+00> +; CHECK: WIDEN-CAST ir<%conv.back> = fptosi ir<%mul> to i32 +; CHECK: WIDEN store vp<{{.*}}>, ir<%conv.back> +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + %lv = load i32, ptr %gep.A, align 4, !tbaa !0, !range !6 + %conv = sitofp i32 %lv to float, !fpmath !5 + %mul = fmul float %conv, 2.0, !fpmath !5 + %conv.back = fptosi float %mul to i32 + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + store i32 %conv.back, ptr %gep.B, align 4, !tbaa !0 + %i.next = add i32 %i, 1 + %cond = icmp eq i32 %i.next, %n + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +declare float @llvm.sqrt.f32(float) + +define void @test_intrinsic_with_metadata(ptr noalias %A, ptr noalias %B, i32 %n) { +; CHECK-LABEL: Checking a loop in 'test_intrinsic_with_metadata' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK: <x1> vector loop: { +; CHECK: vector.body: +; CHECK: WIDEN ir<%lv> = load vp<{{.*}}> +; CHECK: WIDEN-INTRINSIC ir<%sqrt> = call llvm.sqrt(ir<%lv>) +; CHECK: WIDEN store vp<{{.*}}>, ir<%sqrt> +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %gep.A = getelementptr inbounds float, ptr %A, i32 %i + %lv = load float, ptr %gep.A, align 4, !tbaa !0 + %sqrt = call float @llvm.sqrt.f32(float %lv), !fpmath !5 + %gep.B = getelementptr inbounds float, ptr %B, i32 %i + store float %sqrt, ptr %gep.B, align 4, !tbaa !0 + %i.next = add i32 %i, 1 + %cond = icmp eq i32 %i.next, %n + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +define void @test_widen_with_multiple_metadata(ptr noalias %A, ptr noalias %B, i32 %n) { +; CHECK-LABEL: Checking a loop in 'test_widen_with_multiple_metadata' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK: <x1> vector loop: { +; CHECK: vector.body: +; CHECK: WIDEN ir<%lv> = load vp<{{.*}}> +; CHECK: WIDEN-CAST ir<%conv> = sitofp ir<%lv> to float +; CHECK: WIDEN ir<%mul> = fmul ir<%conv>, ir<2.000000e+00> +; CHECK: WIDEN-CAST ir<%conv.back> = fptosi ir<%mul> to i32 +; CHECK: WIDEN store vp<{{.*}}>, ir<%conv.back> +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + %lv = load i32, ptr %gep.A, align 4, !tbaa !0, !range !6 + %conv = sitofp i32 %lv to float + %mul = fmul float %conv, 2.0 + %conv.back = fptosi float %mul to i32 + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + store i32 %conv.back, ptr %gep.B, align 4, !tbaa !0 + %i.next = add i32 %i, 1 + %cond = icmp eq i32 %i.next, %n + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2} +!2 = !{!"root"} +!5 = !{float 2.500000e+00} +!6 = !{i32 0, i32 100} diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index 291ada8..ef678ff 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -804,9 +804,9 @@ exit: define i32 @print_mulacc_extended_const(ptr %start, ptr %end) { ; CHECK-LABEL: 'print_mulacc_extended_const' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<%0> = VF -; CHECK-NEXT: Live-in vp<%1> = VF * UF -; CHECK-NEXT: Live-in vp<%2> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<%3> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<entry>: @@ -814,107 +814,84 @@ define i32 @print_mulacc_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1> -; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: vp<[[DER_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1> +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: <x1> vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9> -; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0> -; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7> -; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep> -; CHECK-NEXT: WIDEN ir<%l> = load vp<%8> -; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul (ir<%l> zext to i32), (ir<63> zext to i32)) -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul (ir<%l> zext to i32), (ir<63> zext to i32)) +; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9> -; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2> +; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> -; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT: Successor(s): ir-bb<loop> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<loop>: -; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph) -; CHECK-NEXT: IR %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph) -; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 -; CHECK-NEXT: IR %l.ext = zext i8 %l to i32 -; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63 -; CHECK-NEXT: IR %red.next = add i32 %red, %mul -; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 -; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' { -; CHECK-NEXT: Live-in ir<%1> = original trip-count +entry: + br label %loop + +loop: + %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] + %l = load i8, ptr %ptr.iv, align 1 + %l.ext = zext i8 %l to i32 + %mul = mul i32 %l.ext, 63 + %red.next = add i32 %red, %mul + %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 + %ec = icmp eq ptr %ptr.iv, %end + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %red.next +} + +define i32 @print_mulacc_extended_const_lhs(ptr %start, ptr %end) { +; CHECK-LABEL: 'print_mulacc_extended_const_lhs' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: vp<%3> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<entry>: -; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64 -; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64 -; CHECK-NEXT: IR %0 = add i64 %end1, 1 -; CHECK-NEXT: IR %1 = sub i64 %0, %start2 -; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4> -; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check> -; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph +; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) +; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4> -; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf> -; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1> -; CHECK-NEXT: Successor(s): vector.body -; CHECK-EMPTY: -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next> -; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index> -; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep> -; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = zext ir<%l> to i32 -; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<63> -; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>) -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec> -; CHECK-NEXT: Successor(s): middle.block, vector.body +; CHECK-NEXT: vp<[[DER_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1> +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: <x1> vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = zext ir<%l> to i32 +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul ir<63>, ir<%l.ext>) +; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next> -; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec> +; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> -; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<scalar.ph>: -; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT: Successor(s): ir-bb<loop> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<loop>: -; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>) -; CHECK-NEXT: IR %red = phi i32 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>) -; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 -; CHECK-NEXT: IR %l.ext = zext i8 %l to i32 -; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63 -; CHECK-NEXT: IR %red.next = add i32 %red, %mul -; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 -; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end -; CHECK-NEXT: No successors -; CHECK-NEXT: } entry: br label %loop @@ -923,7 +900,7 @@ loop: %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] %l = load i8, ptr %ptr.iv, align 1 %l.ext = zext i8 %l to i32 - %mul = mul i32 %l.ext, 63 + %mul = mul i32 63, %l.ext %red.next = add i32 %red, %mul %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 %ec = icmp eq ptr %ptr.iv, %end @@ -937,9 +914,9 @@ exit: define i32 @print_mulacc_not_extended_const(ptr %start, ptr %end) { ; CHECK-LABEL: 'print_mulacc_not_extended_const' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<%0> = VF -; CHECK-NEXT: Live-in vp<%1> = VF * UF -; CHECK-NEXT: Live-in vp<%2> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<%3> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<entry>: @@ -947,108 +924,30 @@ define i32 @print_mulacc_not_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1> -; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: vp<[[DER_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1> +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: <x1> vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9> -; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0> -; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7> -; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep> -; CHECK-NEXT: WIDEN ir<%l> = load vp<%8> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 -; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul ir<%l.ext>, ir<128>) -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul ir<%l.ext>, ir<128>) +; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9> -; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2> +; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX:%.+]]>, vp<[[RDX_NEXT]]> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> -; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT: Successor(s): ir-bb<loop> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<loop>: -; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph) -; CHECK-NEXT: IR %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph) -; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 -; CHECK-NEXT: IR %l.ext = sext i8 %l to i32 -; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128 -; CHECK-NEXT: IR %red.next = add i32 %red, %mul -; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 -; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' { -; CHECK-NEXT: Live-in ir<%1> = original trip-count -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<entry>: -; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64 -; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64 -; CHECK-NEXT: IR %0 = add i64 %end1, 1 -; CHECK-NEXT: IR %1 = sub i64 %0, %start2 -; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4> -; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check> -; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph -; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: -; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4> -; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf> -; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1> -; CHECK-NEXT: Successor(s): vector.body -; CHECK-EMPTY: -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next> -; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index> -; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep> -; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 -; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128> -; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>) -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec> -; CHECK-NEXT: Successor(s): middle.block, vector.body -; CHECK-EMPTY: -; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next> -; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec> -; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> -; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<scalar.ph>: -; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT: Successor(s): ir-bb<loop> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<loop>: -; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>) -; CHECK-NEXT: IR %red = phi i32 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>) -; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 -; CHECK-NEXT: IR %l.ext = sext i8 %l to i32 -; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128 -; CHECK-NEXT: IR %red.next = add i32 %red, %mul -; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 -; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end -; CHECK-NEXT: No successors -; CHECK-NEXT: } entry: br label %loop @@ -1071,9 +970,9 @@ exit: define i64 @print_ext_mulacc_extended_const(ptr %start, ptr %end) { ; CHECK-LABEL: 'print_ext_mulacc_extended_const' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<%0> = VF -; CHECK-NEXT: Live-in vp<%1> = VF * UF -; CHECK-NEXT: Live-in vp<%2> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<%3> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<entry>: @@ -1081,109 +980,29 @@ define i64 @print_ext_mulacc_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1> -; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: vp<[[DER_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1> +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: <x1> vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9> -; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0> -; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7> -; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep> -; CHECK-NEXT: WIDEN ir<%l> = load vp<%8> -; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul (ir<%l> zext to i64), (ir<63> zext to i64)) -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul (ir<%l> zext to i64), (ir<63> zext to i64)) +; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9> -; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2> +; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> -; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT: Successor(s): ir-bb<loop> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<loop>: -; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph) -; CHECK-NEXT: IR %red = phi i64 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph) -; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 -; CHECK-NEXT: IR %l.ext = zext i8 %l to i32 -; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63 -; CHECK-NEXT: IR %mul.ext = zext i32 %mul to i64 -; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext -; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 -; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' { -; CHECK-NEXT: Live-in ir<%1> = original trip-count -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<entry>: -; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64 -; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64 -; CHECK-NEXT: IR %0 = add i64 %end1, 1 -; CHECK-NEXT: IR %1 = sub i64 %0, %start2 -; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4> -; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check> -; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph -; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: -; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4> -; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf> -; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1> -; CHECK-NEXT: Successor(s): vector.body -; CHECK-EMPTY: -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next> -; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index> -; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep> -; CHECK-NEXT: WIDEN-CAST vp<%4> = zext ir<%l> to i64 -; CHECK-NEXT: WIDEN ir<%mul> = mul vp<%4>, ir<63> -; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>) -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec> -; CHECK-NEXT: Successor(s): middle.block, vector.body -; CHECK-EMPTY: -; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<%6> = compute-reduction-result ir<%red>, ir<%red.next> -; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec> -; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> -; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%6> from middle.block) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<scalar.ph>: -; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%6>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT: Successor(s): ir-bb<loop> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<loop>: -; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>) -; CHECK-NEXT: IR %red = phi i64 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>) -; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 -; CHECK-NEXT: IR %l.ext = zext i8 %l to i32 -; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63 -; CHECK-NEXT: IR %mul.ext = zext i32 %mul to i64 -; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext -; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 -; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end -; CHECK-NEXT: No successors -; CHECK-NEXT: } entry: br label %loop @@ -1207,9 +1026,9 @@ exit: define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) { ; CHECK-LABEL: 'print_ext_mulacc_not_extended_const' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<%0> = VF -; CHECK-NEXT: Live-in vp<%1> = VF * UF -; CHECK-NEXT: Live-in vp<%2> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<%3> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<entry>: @@ -1217,112 +1036,31 @@ define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1> -; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: vp<[[DER_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1> +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: <x1> vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9> -; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0> -; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7> -; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep> -; CHECK-NEXT: WIDEN ir<%l> = load vp<%8> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128> -; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (ir<%mul> sext to i64) -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (ir<%mul> sext to i64) +; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9> -; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2> -; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> -; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT: Successor(s): ir-bb<loop> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<loop>: -; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph) -; CHECK-NEXT: IR %red = phi i64 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph) -; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 -; CHECK-NEXT: IR %l.ext = sext i8 %l to i32 -; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128 -; CHECK-NEXT: IR %mul.ext = sext i32 %mul to i64 -; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext -; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 -; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' { -; CHECK-NEXT: Live-in ir<%1> = original trip-count -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<entry>: -; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64 -; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64 -; CHECK-NEXT: IR %0 = add i64 %end1, 1 -; CHECK-NEXT: IR %1 = sub i64 %0, %start2 -; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4> -; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check> -; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph -; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: -; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4> -; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf> -; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1> -; CHECK-NEXT: Successor(s): vector.body -; CHECK-EMPTY: -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next> -; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index> -; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep> -; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 -; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128> -; CHECK-NEXT: WIDEN-CAST ir<%mul.ext> = sext ir<%mul> to i64 -; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul.ext>) -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec> -; CHECK-NEXT: Successor(s): middle.block, vector.body -; CHECK-EMPTY: -; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next> -; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec> +; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> -; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<scalar.ph>: -; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT: Successor(s): ir-bb<loop> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<loop>: -; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>) -; CHECK-NEXT: IR %red = phi i64 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>) -; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 -; CHECK-NEXT: IR %l.ext = sext i8 %l to i32 -; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128 -; CHECK-NEXT: IR %mul.ext = sext i32 %mul to i64 -; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext -; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 -; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end -; CHECK-NEXT: No successors -; CHECK-NEXT: } entry: br label %loop diff --git a/llvm/test/Transforms/PGOProfile/memprof_diff_inline.ll b/llvm/test/Transforms/PGOProfile/memprof_diff_inline.ll new file mode 100644 index 0000000..5213a07 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/memprof_diff_inline.ll @@ -0,0 +1,118 @@ +;; Tests that the compiler ignores smaller contexts that differ only in the +;; IsInlineFrame bool. These map to the same full context id internally, as we +;; ignore the inline frame status which may differ in feedback compiles. +;; Presumably this happens when profiles collected from different binaries are +;; merged. If we didn't pick the largest we would default them all to noncold. + +;; Avoid failures on big-endian systems that can't read the profile properly +; REQUIRES: x86_64-linux + +;; Generate the profile and the IR. +; RUN: split-file %s %t + +;; Generate indexed profile +; RUN: llvm-profdata merge %t/memprof_diff_inline.yaml -o %t.memprofdata + +; RUN: opt < %t/memprof_diff_inline.ll -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes -memprof-print-match-info 2>&1 | FileCheck %s --check-prefixes=MEMPROF + +; MEMPROF: MemProf notcold context with id 10194276560488437434 has total profiled size 200 is matched with 1 frames +; MEMPROF: MemProf cold context with id 16342802530253093571 has total profiled size 10000 is matched with 1 frames + +;--- memprof_diff_inline.yaml +--- +HeapProfileRecords: + - GUID: _Z3foov + AllocSites: + # Small non-cold, full context id 16342802530253093571, should ignore + - Callstack: + - { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: main, LineOffset: 8, Column: 13, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 10 + TotalLifetime: 0 + TotalLifetimeAccessDensity: 20000 + # Large cold, full context id 16342802530253093571, should keep + - Callstack: + - { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: true } + - { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: main, LineOffset: 8, Column: 13, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 10000 + TotalLifetime: 200000 + TotalLifetimeAccessDensity: 0 + # Small non-cold, full context id 16342802530253093571, should ignore + - Callstack: + - { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: true } + - { Function: main, LineOffset: 8, Column: 13, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 100 + TotalLifetime: 0 + TotalLifetimeAccessDensity: 20000 + # Small non-cold, full context id 10194276560488437434 + - Callstack: + - { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: main, LineOffset: 9, Column: 13, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 200 + TotalLifetime: 0 + TotalLifetimeAccessDensity: 20000 + CallSites: [] +... +;--- memprof_diff_inline.ll +; ModuleID = 'memprof_diff_inline.cc' +source_filename = "memprof_diff_inline.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%"struct.std::nothrow_t" = type { i8 } + +@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1 + +define dso_local noundef ptr @_Z3foov() !dbg !10 { +entry: + ; MEMPROF: call {{.*}} @_Znwm{{.*}} !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]] + %call = call noalias noundef align 32 ptr @_Znwm(i64 noundef 32) #6, !dbg !13 + ret ptr %call +} + +declare noundef ptr @_Znwm(i64 noundef) + +attributes #6 = { builtin allocsize(0) } + +; MEMPROF: ![[M1]] = !{![[MIB1:[0-9]+]], ![[MIB2:[0-9]+]]} + +; MEMPROF: ![[MIB1]] = !{![[STACK1:[0-9]+]], !"notcold", ![[CONTEXTSIZE1:[0-9]+]]} +; MEMPROF: ![[STACK1]] = !{i64 2732490490862098848, i64 8467819354083268568, i64 9086428284934609951, i64 2061451396820446691} +;; Full context id 10194276560488437434 == -8252467513221114182 +; MEMPROF: ![[CONTEXTSIZE1]] = !{i64 -8252467513221114182, i64 200} + +; MEMPROF: ![[MIB2]] = !{![[STACK2:[0-9]+]], !"cold", ![[CONTEXTSIZE2:[0-9]+]]} +; MEMPROF: ![[STACK2]] = !{i64 2732490490862098848, i64 8467819354083268568, i64 9086428284934609951, i64 -5747251260480066785} +;; Full context id 16342802530253093571 == -2103941543456458045 +;; We should have kept the large (cold) one. +; MEMPROF: ![[CONTEXTSIZE2]] = !{i64 -2103941543456458045, i64 10000} + +; MEMPROF: ![[C1]] = !{i64 2732490490862098848} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6cbe6284d1f0a088b5c6482ae27b738f03d82fe7)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "memprof.cc", directory: "/usr/local/google/home/tejohnson/llvm/tmp", checksumkind: CSK_MD5, checksum: "e8c40ebe4b21776b4d60e9632cbc13c2") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12) +!11 = !DISubroutineType(types: !12) +!12 = !{} +!13 = !DILocation(line: 5, column: 10, scope: !10) diff --git a/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll b/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll index 2461ca3..ba53c57 100644 --- a/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll +++ b/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll @@ -4,24 +4,50 @@ ;; Avoid failures on big-endian systems that can't read the profile properly ; REQUIRES: x86_64-linux -;; TODO: Use text profile inputs once that is available for memprof. -;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh. -;; # To generate below LLVM IR for use in matching. -;; $ clang++ -gmlt -fdebug-info-for-profiling -S %S/Inputs/memprof_loop_unroll_b.cc -emit-llvm +; Generate the profile and the IR. +; RUN: split-file %s %t + +;; Generate indexed profile +; RUN: llvm-profdata merge %t/memprof_loop_unroll.yaml -o %t.memprofdata -; RUN: llvm-profdata merge %S/Inputs/memprof_loop_unroll.memprofraw --profiled-binary %S/Inputs/memprof_loop_unroll.exe -o %t.memprofdata ;; Set the minimum lifetime threshold to 0 to ensure that one context is ;; considered cold (the other will be notcold). -; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes -memprof-ave-lifetime-cold-threshold=0 2>&1 | FileCheck %s +; RUN: opt < %t/memprof_loop_unroll.ll -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes -memprof-ave-lifetime-cold-threshold=0 2>&1 | FileCheck %s -;; Conservatively annotate as not cold. We get two messages as there are two -;; unrolled copies of the allocation. -; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and indistinguishable alloc type notcold: 4 -; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and indistinguishable alloc type notcold: 4 +;; Conservatively annotate as not cold. +; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and single alloc type notcold: 4 ; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]] ; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" } ; CHECK-NOT: stackIds: () +;--- memprof_loop_unroll.yaml +--- +HeapProfileRecords: + - GUID: 0x7f8d88fcc70a347b + AllocSites: + - Callstack: + - { Function: 0x7f8d88fcc70a347b, LineOffset: 2, Column: 16, IsInlineFrame: false } + - { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 5, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 4 + TotalLifetime: 2 + TotalLifetimeAccessDensity: 12500000000 + - Callstack: + - { Function: 0x7f8d88fcc70a347b, LineOffset: 2, Column: 16, IsInlineFrame: false } + - { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 5, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 4 + TotalLifetime: 2 + TotalLifetimeAccessDensity: 0 + - GUID: 0xdb956436e78dd5fa + CallSites: + - Frames: + - { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 5, IsInlineFrame: false } +... + +;--- memprof_loop_unroll.ll ; ModuleID = 'memprof_loop_unroll_b.cc' source_filename = "memprof_loop_unroll_b.cc" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll index d16843c..6629b12 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s -define ptr @test(ptr %d) { +define ptr @test(ptr %d, i64 %v) { ; CHECK-LABEL: define ptr @test( -; CHECK-SAME: ptr [[D:%.*]]) { +; CHECK-SAME: ptr [[D:%.*]], i64 [[V:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[D]], align 1 ; CHECK-NEXT: [[CMP4_2:%.*]] = icmp eq i8 [[TMP0]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, 0 -; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 4 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, [[V]] +; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, [[V]] ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x i64> poison, i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x i64> [[TMP5]], i64 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x i64> [[TMP6]], i64 [[TMP4]], i32 4 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <6 x i64> [[TMP7]], <6 x i64> poison, <6 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4> -; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 1, i64 1, i64 1, i64 0> +; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 4, i64 3, i64 5, i64 4> ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <6 x i64> [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <6 x i64> [[TMP9]], i32 1 @@ -31,23 +31,23 @@ define ptr @test(ptr %d) { ; CHECK-NEXT: ret ptr [[TMP20]] ; entry: - %0 = load i8, ptr null, align 1 + %0 = load i8, ptr %d, align 1 %cmp4.2 = icmp eq i8 %0, 0 - %1 = select i1 %cmp4.2, i64 0, i64 0 + %1 = select i1 %cmp4.2, i64 0, i64 4 %2 = shl i64 %1, 1 %3 = getelementptr i8, ptr %d, i64 %2 - %4 = xor i64 0, 0 - %5 = udiv i64 %4, 0 + %4 = xor i64 0, %v + %5 = udiv i64 %4, 3 %6 = mul i64 %5, 6 %7 = getelementptr i8, ptr %d, i64 %6 - %8 = shl i64 %1, 0 + %8 = shl i64 %1, 2 %scevgep42 = getelementptr i8, ptr %d, i64 %8 - %9 = mul i64 %5, 1 + %9 = mul i64 %5, 3 %10 = getelementptr i8, ptr %d, i64 %9 - %11 = udiv i64 1, 0 - %12 = mul i64 %11, 1 + %11 = udiv i64 1, %v + %12 = mul i64 %11, 5 %13 = getelementptr i8, ptr %d, i64 %12 - %14 = mul i64 %11, 0 + %14 = mul i64 %11, 4 %15 = getelementptr i8, ptr %d, i64 %14 ret ptr %15 } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll new file mode 100644 index 0000000..959b235 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define float @test(i8 %0) { +; CHECK-LABEL: define float @test( +; CHECK-SAME: i8 [[TMP0:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> <i8 poison, i8 0>, i8 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 2, i32 27> +; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP2]], <i32 2, i32 27> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: switch i32 [[TMP8]], label %[[EXIT:.*]] [ +; CHECK-NEXT: i32 0, label %[[EXIT]] +; CHECK-NEXT: i32 1, label %[[EXIT]] +; CHECK-NEXT: ] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret float 0.000000e+00 +; +entry: + %1 = sext i8 0 to i32 + %2 = lshr i32 %1, 27 + %3 = sext i8 %0 to i32 + %reass.add.epil = mul i32 %3, 2 + %4 = or i32 %reass.add.epil, %2 + switch i32 %4, label %exit [ + i32 0, label %exit + i32 1, label %exit + ] + +exit: + ret float 0.000000e+00 +} diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll index aa95b3f..d818335 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll @@ -1,8 +1,13 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 ; RUN: opt -passes='simplifycfg<switch-to-lookup>' -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" +;. +; CHECK: @switch.table.switch_of_powers_two = private unnamed_addr constant [7 x i32] [i32 3, i32 poison, i32 poison, i32 2, i32 1, i32 0, i32 42], align 4 +; CHECK: @switch.table.switch_of_powers_two_default_reachable = private unnamed_addr constant [7 x i32] [i32 3, i32 5, i32 5, i32 2, i32 1, i32 0, i32 42], align 4 +; CHECK: @switch.table.switch_of_powers_two_default_reachable_multipreds = private unnamed_addr constant [7 x i32] [i32 3, i32 poison, i32 poison, i32 2, i32 1, i32 0, i32 42], align 4 +;. define i32 @switch_of_powers_two(i32 %arg) { ; CHECK-LABEL: define i32 @switch_of_powers_two( ; CHECK-SAME: i32 [[ARG:%.*]]) { @@ -35,17 +40,17 @@ return: ret i32 %phi } -define i32 @switch_of_powers_two_default_reachable(i32 %arg) { +define i32 @switch_of_powers_two_default_reachable(i32 %arg) !prof !0 { ; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable( -; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-SAME: i32 [[ARG:%.*]]) !prof [[PROF0:![0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]]) ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1 -; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]] +; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]], !prof [[PROF1:![0-9]+]] ; CHECK: [[ENTRY_SPLIT]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7 -; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]], !prof [[PROF2:![0-9]+]] ; CHECK: [[SWITCH_LOOKUP]]: ; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64 ; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]] @@ -62,7 +67,7 @@ entry: i32 16, label %bb3 i32 32, label %bb4 i32 64, label %bb5 - ] + ], !prof !1 default_case: br label %return bb1: br label %return @@ -128,3 +133,13 @@ return: %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ %pn, %default_case ] ret i32 %phi } + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 10, i32 5, i32 7, i32 11, i32 13, i32 17} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 58, i32 5} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 56, i32 5} +;. diff --git a/llvm/test/Transforms/SimplifyCFG/pr165301.ll b/llvm/test/Transforms/SimplifyCFG/pr165301.ll index 4a539d7..1df6552 100644 --- a/llvm/test/Transforms/SimplifyCFG/pr165301.ll +++ b/llvm/test/Transforms/SimplifyCFG/pr165301.ll @@ -1,11 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt -S -passes="simplifycfg<switch-range-to-icmp>" < %s | FileCheck %s ; Make sure there's no use after free when removing incoming values from PHI nodes -define i32 @pr165301(i1 %cond) { +define i32 @pr165301(i1 %cond) !prof !0 { ; CHECK-LABEL: define i32 @pr165301( -; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-SAME: i1 [[COND:%.*]]) !prof [[PROF0:![0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[SWITCHBB:.*]] ; CHECK: [[SWITCHBB]]: @@ -18,9 +18,14 @@ switchbb: switch i1 %cond, label %default [ i1 false, label %switchbb i1 true, label %switchbb - ] + ], !prof !1 default: %phi.lcssa = phi i32 [ 0, %switchbb ] ret i32 %phi.lcssa } +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 2, i32 3, i32 5} +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10} +;. diff --git a/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll b/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll index 0be13ee..f024106 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments ; RUN: opt -S -passes=print-predicateinfo < %s 2>&1 >/dev/null | FileCheck %s ; FIXME: RenamedOp should be %cmp or %x in all cases here, @@ -9,25 +9,25 @@ define i32 @test(i32 %x) { ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK: RenamedOp: [[CMP]] -; CHECK: [[CMP_0:%.*]] = bitcast i1 [[CMP]] to i1 -; CHECK: RenamedOp: [[X]] -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[BB2:%.*]], label [[EXIT1:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB1]],label [[BB2:%.*]]], RenamedOp: [[CMP]] } +; CHECK-NEXT: [[CMP_0:%.*]] = bitcast i1 [[CMP]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB1]],label [[BB2]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[BB2]], label [[EXIT1:%.*]] ; CHECK: bb2: -; CHECK: RenamedOp: [[CMP_0]] -; CHECK: [[CMP_0_1:%.*]] = bitcast i1 [[CMP_0]] to i1 -; CHECK: RenamedOp: [[X]] -; CHECK: [[X_0_1:%.*]] = bitcast i32 [[X_0]] to i32 -; CHECK: RenamedOp: [[X_0]] -; CHECK: [[X_0_4:%.*]] = bitcast i32 [[X_0]] to i32 -; CHECK-NEXT: br i1 [[CMP_0]], label [[BB3:%.*]], label [[EXIT2:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB2]],label [[BB3:%.*]]], RenamedOp: [[CMP_0]] } +; CHECK-NEXT: [[CMP_0_1:%.*]] = bitcast i1 [[CMP_0]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB2]],label [[BB3]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0_1:%.*]] = bitcast i32 [[X_0]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB2]],label [[EXIT2:%.*]]], RenamedOp: [[X_0]] } +; CHECK-NEXT: [[X_0_4:%.*]] = bitcast i32 [[X_0]] to i32 +; CHECK-NEXT: br i1 [[CMP_0]], label [[BB3]], label [[EXIT2]] ; CHECK: bb3: -; CHECK: RenamedOp: [[X]] -; CHECK: [[X_0_1_2:%.*]] = bitcast i32 [[X_0_1]] to i32 -; CHECK: RenamedOp: [[X_0_1]] -; CHECK: [[X_0_1_3:%.*]] = bitcast i32 [[X_0_1]] to i32 -; CHECK-NEXT: br i1 [[CMP_0_1]], label [[EXIT3:%.*]], label [[EXIT4:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB3]],label [[EXIT3:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0_1_2:%.*]] = bitcast i32 [[X_0_1]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB3]],label [[EXIT4:%.*]]], RenamedOp: [[X_0_1]] } +; CHECK-NEXT: [[X_0_1_3:%.*]] = bitcast i32 [[X_0_1]] to i32 +; CHECK-NEXT: br i1 [[CMP_0_1]], label [[EXIT3]], label [[EXIT4]] ; CHECK: exit1: ; CHECK-NEXT: ret i32 0 ; CHECK: exit2: diff --git a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll index 256d0d9..42e8ccb 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments ; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s @a = external global i32 ; <ptr> [#uses=7] @@ -98,12 +98,17 @@ define void @test3(i32 %x, i32 %y) { ; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]] -; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 -; CHECK: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1 -; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 -; CHECK-NEXT: br i1 [[Z]], label [[BOTH_ZERO:%.*]], label [[NOPE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = and i1 [[XZ]], [[YZ]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[Z]] } +; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH_ZERO:%.*]]], RenamedOp: [[XZ]] } +; CHECK-NEXT: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH_ZERO]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH_ZERO]]], RenamedOp: [[YZ]] } +; CHECK-NEXT: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH_ZERO]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 +; CHECK-NEXT: br i1 [[Z]], label [[BOTH_ZERO]], label [[NOPE]] ; CHECK: both_zero: ; CHECK-NEXT: call void @foo(i1 [[XZ_0]]) ; CHECK-NEXT: call void @foo(i1 [[YZ_0]]) @@ -133,10 +138,11 @@ define void @test4(i1 %b, i32 %x) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: br i1 [[B:%.*]], label [[SW:%.*]], label [[CASE3:%.*]] ; CHECK: sw: -; CHECK: [[X_0:%.*]] = bitcast i32 [[X:%.*]] to i32 +; CHECK-NEXT: ; switch predicate info { CaseValue: i32 1 Edge: [label [[SW]],label [[CASE1:%.*]]], RenamedOp: [[X:%.*]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 ; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT:%.*]] [ ; CHECK-NEXT: i32 0, label [[CASE0:%.*]] -; CHECK-NEXT: i32 1, label [[CASE1:%.*]] +; CHECK-NEXT: i32 1, label [[CASE1]] ; CHECK-NEXT: i32 2, label [[CASE0]] ; CHECK-NEXT: i32 3, label [[CASE3]] ; CHECK-NEXT: i32 4, label [[DEFAULT]] @@ -180,11 +186,15 @@ case3: define i1 @test5(i32 %x, i32 %y) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[X_1:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 -; CHECK: [[Y_1:%.*]] = bitcast i32 [[Y]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], [[Y]] Edge: [label [[TMP0:%.*]],label [[SAME:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_1:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[SAME]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_1:%.*]] = bitcast i32 [[Y]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[SAME]], label [[DIFFERENT]] ; CHECK: same: ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[X_0]], [[Y_0]] ; CHECK-NEXT: ret i1 [[CMP2]] @@ -253,11 +263,15 @@ different: define i1 @test7(i32 %x, i32 %y) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]] -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[X_1:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 -; CHECK: [[Y_1:%.*]] = bitcast i32 [[Y]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp sgt i32 [[X]], [[Y]] Edge: [label [[TMP0:%.*]],label [[SAME:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp sgt i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_1:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp sgt i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[SAME]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp sgt i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_1:%.*]] = bitcast i32 [[Y]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[SAME]], label [[DIFFERENT]] ; CHECK: same: ; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[X_0]], [[Y_0]] ; CHECK-NEXT: ret i1 [[CMP2]] @@ -280,11 +294,15 @@ different: define i1 @test7_fp(float %x, float %y) { ; CHECK-LABEL: @test7_fp( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]] -; CHECK: [[X_0:%.*]] = bitcast float [[X]] to float -; CHECK: [[X_1:%.*]] = bitcast float [[X]] to float -; CHECK: [[Y_0:%.*]] = bitcast float [[Y]] to float -; CHECK: [[Y_1:%.*]] = bitcast float [[Y]] to float -; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = fcmp ogt float [[X]], [[Y]] Edge: [label [[TMP0:%.*]],label [[SAME:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast float [[X]] to float +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = fcmp ogt float [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_1:%.*]] = bitcast float [[X]] to float +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = fcmp ogt float [[X]], [[Y]] Edge: [label [[TMP0]],label [[SAME]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast float [[Y]] to float +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = fcmp ogt float [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_1:%.*]] = bitcast float [[Y]] to float +; CHECK-NEXT: br i1 [[CMP]], label [[SAME]], label [[DIFFERENT]] ; CHECK: same: ; CHECK-NEXT: [[CMP2:%.*]] = fcmp ule float [[X_0]], [[Y_0]] ; CHECK-NEXT: ret i1 [[CMP2]] @@ -353,9 +371,11 @@ different: define i32 @test9(i32 %i, i32 %j) { ; CHECK-LABEL: @test9( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]] -; CHECK: [[I_0:%.*]] = bitcast i32 [[I]] to i32 -; CHECK: [[J_0:%.*]] = bitcast i32 [[J]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[I]], [[J]] Edge: [label [[TMP0:%.*]],label [[COND_TRUE:%.*]]], RenamedOp: [[I]] } +; CHECK-NEXT: [[I_0:%.*]] = bitcast i32 [[I]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[I]], [[J]] Edge: [label [[TMP0]],label [[COND_TRUE]]], RenamedOp: [[J]] } +; CHECK-NEXT: [[J_0:%.*]] = bitcast i32 [[J]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE]], label [[RET:%.*]] ; CHECK: cond_true: ; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]] ; CHECK-NEXT: ret i32 [[DIFF]] @@ -376,9 +396,11 @@ ret: define i32 @test10(i32 %j, i32 %i) { ; CHECK-LABEL: @test10( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]] -; CHECK: [[I_0:%.*]] = bitcast i32 [[I]] to i32 -; CHECK: [[J_0:%.*]] = bitcast i32 [[J]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[I]], [[J]] Edge: [label [[TMP0:%.*]],label [[COND_TRUE:%.*]]], RenamedOp: [[I]] } +; CHECK-NEXT: [[I_0:%.*]] = bitcast i32 [[I]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[I]], [[J]] Edge: [label [[TMP0]],label [[COND_TRUE]]], RenamedOp: [[J]] } +; CHECK-NEXT: [[J_0:%.*]] = bitcast i32 [[J]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE]], label [[RET:%.*]] ; CHECK: cond_true: ; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]] ; CHECK-NEXT: ret i32 [[DIFF]] @@ -403,15 +425,18 @@ define i32 @test11(i32 %x) { ; CHECK-NEXT: [[V0:%.*]] = call i32 @yogibar() ; CHECK-NEXT: [[V1:%.*]] = call i32 @yogibar() ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V0]], [[V1]] -; CHECK: [[V0_0:%.*]] = bitcast i32 [[V0]] to i32 -; CHECK: [[V1_0:%.*]] = bitcast i32 [[V1]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[NEXT:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[V0]], [[V1]] Edge: [label [[TMP0:%.*]],label [[NEXT:%.*]]], RenamedOp: [[V0]] } +; CHECK-NEXT: [[V0_0:%.*]] = bitcast i32 [[V0]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[V0]], [[V1]] Edge: [label [[TMP0]],label [[COND_TRUE:%.*]]], RenamedOp: [[V1]] } +; CHECK-NEXT: [[V1_0:%.*]] = bitcast i32 [[V1]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE]], label [[NEXT]] ; CHECK: cond_true: ; CHECK-NEXT: ret i32 [[V1_0]] ; CHECK: next: ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X:%.*]], [[V0_0]] -; CHECK: [[V0_0_1:%.*]] = bitcast i32 [[V0_0]] to i32 -; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE2:%.*]], label [[NEXT2:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP2]] = icmp eq i32 [[X]], [[V0_0]] Edge: [label [[NEXT]],label [[COND_TRUE2:%.*]]], RenamedOp: [[V0_0]] } +; CHECK-NEXT: [[V0_0_1:%.*]] = bitcast i32 [[V0_0]] to i32 +; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE2]], label [[NEXT2:%.*]] ; CHECK: cond_true2: ; CHECK-NEXT: ret i32 [[V0_0_1]] ; CHECK: next2: @@ -439,9 +464,11 @@ next2: define i32 @test12(i32 %x) { ; CHECK-LABEL: @test12( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[X_1:%.*]] = bitcast i32 [[X]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0:%.*]],label [[COND_TRUE:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[COND_FALSE:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_1:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE]], label [[COND_FALSE]] ; CHECK: cond_true: ; CHECK-NEXT: br label [[RET:%.*]] ; CHECK: cond_false: diff --git a/llvm/test/Transforms/Util/PredicateInfo/diamond.ll b/llvm/test/Transforms/Util/PredicateInfo/diamond.ll index ac2c9a1..06c02d6 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/diamond.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/diamond.ll @@ -1,16 +1,18 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=print-predicateinfo < %s 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments +; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s define i1 @f(i32 %x, i1 %y) { ; CHECK-LABEL: @f( ; CHECK-NEXT: br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp sge i32 [[X]], 0 Edge: [label [[BB0]],label [[BB2:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[BB2]], label [[BB3:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[X2:%.*]] = add nuw nsw i32 [[X]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X2]], 2 -; CHECK: [[X2_0:%.*]] = bitcast i32 [[X2]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP2]] = icmp sge i32 [[X2]], 2 Edge: [label [[BB1]],label [[BB2]]], RenamedOp: [[X2]] } +; CHECK-NEXT: [[X2_0:%.*]] = bitcast i32 [[X2]] to i32 ; CHECK-NEXT: br i1 [[CMP2]], label [[BB2]], label [[BB3]] ; CHECK: bb2: ; CHECK-NEXT: [[X3:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ [[X2_0]], [[BB1]] ] @@ -38,12 +40,14 @@ define i1 @g(i32 %x, i1 %y) { ; CHECK-NEXT: br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[BB3:%.*]], label [[BB2:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp sge i32 [[X]], 0 Edge: [label [[BB0]],label [[BB2:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[BB3:%.*]], label [[BB2]] ; CHECK: bb1: ; CHECK-NEXT: [[X2:%.*]] = add nuw nsw i32 [[X]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X2]], 2 -; CHECK: [[X2_0:%.*]] = bitcast i32 [[X2]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP2]] = icmp sge i32 [[X2]], 2 Edge: [label [[BB1]],label [[BB2]]], RenamedOp: [[X2]] } +; CHECK-NEXT: [[X2_0:%.*]] = bitcast i32 [[X2]] to i32 ; CHECK-NEXT: br i1 [[CMP2]], label [[BB3]], label [[BB2]] ; CHECK: bb2: ; CHECK-NEXT: [[X3:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ [[X2_0]], [[BB1]] ] diff --git a/llvm/test/Transforms/Util/PredicateInfo/edge.ll b/llvm/test/Transforms/Util/PredicateInfo/edge.ll index ef757f3..9138326 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/edge.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/edge.ll @@ -1,16 +1,17 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=print-predicateinfo < %s 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments +; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s define i32 @f1(i32 %x) { ; CHECK-LABEL: @f1( ; CHECK-NEXT: bb0: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[BB2:%.*]], label [[BB1:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB0:%.*]],label [[BB2:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[BB2]], label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0:%.*]] ], [ 0, [[BB1]] ] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ 0, [[BB1]] ] ; CHECK-NEXT: [[FOO:%.*]] = add i32 [[COND]], [[X]] ; CHECK-NEXT: ret i32 [[FOO]] ; @@ -29,12 +30,13 @@ define i32 @f2(i32 %x) { ; CHECK-LABEL: @f2( ; CHECK-NEXT: bb0: ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp ne i32 [[X]], 0 Edge: [label [[BB0:%.*]],label [[BB2:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[BB1:%.*]], label [[BB2]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0:%.*]] ], [ 0, [[BB1]] ] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ 0, [[BB1]] ] ; CHECK-NEXT: [[FOO:%.*]] = add i32 [[COND]], [[X]] ; CHECK-NEXT: ret i32 [[FOO]] ; @@ -52,14 +54,15 @@ bb2: define i32 @f3(i32 %x) { ; CHECK-LABEL: @f3( ; CHECK-NEXT: bb0: -; CHECK: [[X_0:%.*]] = bitcast i32 [[X:%.*]] to i32 +; CHECK-NEXT: ; switch predicate info { CaseValue: i32 0 Edge: [label [[BB0:%.*]],label [[BB2:%.*]]], RenamedOp: [[X:%.*]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 ; CHECK-NEXT: switch i32 [[X]], label [[BB1:%.*]] [ -; CHECK-NEXT: i32 0, label [[BB2:%.*]] +; CHECK-NEXT: i32 0, label [[BB2]] ; CHECK-NEXT: ] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0:%.*]] ], [ 0, [[BB1]] ] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ 0, [[BB1]] ] ; CHECK-NEXT: [[FOO:%.*]] = add i32 [[COND]], [[X]] ; CHECK-NEXT: ret i32 [[FOO]] ; @@ -78,13 +81,14 @@ define double @fcmp_oeq_not_zero(double %x, double %y) { ; CHECK-LABEL: @fcmp_oeq_not_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[Y:%.*]], 2.000000e+00 -; CHECK: [[Y_0:%.*]] = bitcast double [[Y]] to double -; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = fcmp oeq double [[Y]], 2.000000e+00 Edge: [label [[ENTRY:%.*]],label [[IF:%.*]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast double [[Y]] to double +; CHECK-NEXT: br i1 [[CMP]], label [[IF]], label [[RETURN:%.*]] ; CHECK: if: ; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Y_0]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: -; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY]] ] ; CHECK-NEXT: ret double [[RETVAL]] ; entry: @@ -105,13 +109,14 @@ define double @fcmp_une_not_zero(double %x, double %y) { ; CHECK-LABEL: @fcmp_une_not_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[Y:%.*]], 2.000000e+00 -; CHECK: [[Y_0:%.*]] = bitcast double [[Y]] to double -; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = fcmp une double [[Y]], 2.000000e+00 Edge: [label [[ENTRY:%.*]],label [[ELSE:%.*]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast double [[Y]] to double +; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE]] ; CHECK: else: ; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Y_0]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: -; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY]] ] ; CHECK-NEXT: ret double [[RETVAL]] ; entry: @@ -132,13 +137,14 @@ define double @fcmp_oeq_zero(double %x, double %y) { ; CHECK-LABEL: @fcmp_oeq_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[Y:%.*]], 0.000000e+00 -; CHECK: [[Y_0:%.*]] = bitcast double [[Y]] to double -; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = fcmp oeq double [[Y]], 0.000000e+00 Edge: [label [[ENTRY:%.*]],label [[IF:%.*]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast double [[Y]] to double +; CHECK-NEXT: br i1 [[CMP]], label [[IF]], label [[RETURN:%.*]] ; CHECK: if: ; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Y_0]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: -; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY]] ] ; CHECK-NEXT: ret double [[RETVAL]] ; entry: @@ -159,13 +165,14 @@ define double @fcmp_une_zero(double %x, double %y) { ; CHECK-LABEL: @fcmp_une_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[Y:%.*]], -0.000000e+00 -; CHECK: [[Y_0:%.*]] = bitcast double [[Y]] to double -; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = fcmp une double [[Y]], -0.000000e+00 Edge: [label [[ENTRY:%.*]],label [[ELSE:%.*]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast double [[Y]] to double +; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE]] ; CHECK: else: ; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Y_0]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: -; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY]] ] ; CHECK-NEXT: ret double [[RETVAL]] ; entry: @@ -188,13 +195,14 @@ define double @fcmp_oeq_maybe_zero(double %x, double %y, double %z1, double %z2) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Z:%.*]] = fadd double [[Z1:%.*]], [[Z2:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[Y:%.*]], [[Z]] -; CHECK: [[Z_0:%.*]] = bitcast double [[Z]] to double -; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = fcmp oeq double [[Y]], [[Z]] Edge: [label [[ENTRY:%.*]],label [[IF:%.*]]], RenamedOp: [[Z]] } +; CHECK-NEXT: [[Z_0:%.*]] = bitcast double [[Z]] to double +; CHECK-NEXT: br i1 [[CMP]], label [[IF]], label [[RETURN:%.*]] ; CHECK: if: ; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Z_0]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: -; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY]] ] ; CHECK-NEXT: ret double [[RETVAL]] ; entry: @@ -217,13 +225,14 @@ define double @fcmp_une_maybe_zero(double %x, double %y, double %z1, double %z2) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Z:%.*]] = fadd double [[Z1:%.*]], [[Z2:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[Y:%.*]], [[Z]] -; CHECK: [[Z_0:%.*]] = bitcast double [[Z]] to double -; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = fcmp une double [[Y]], [[Z]] Edge: [label [[ENTRY:%.*]],label [[ELSE:%.*]]], RenamedOp: [[Z]] } +; CHECK-NEXT: [[Z_0:%.*]] = bitcast double [[Z]] to double +; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE]] ; CHECK: else: ; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Z_0]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: -; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY]] ] ; CHECK-NEXT: ret double [[RETVAL]] ; entry: diff --git a/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll b/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll index 36eaf6e..4762d37 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=print-predicateinfo < %s 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments +; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s ; Don't insert predicate info for conditions with a single target. @a = global i32 1, align 4 @d = common global i32 0, align 4 @@ -12,22 +12,27 @@ define i32 @main() { ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @d, align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP13:%.*]] -; CHECK: [[TMP4:%.*]] = load i32, ptr @a, align 4 +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr @a, align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr @c, align 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 ; CHECK-NEXT: br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP9:%.*]] -; CHECK: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9]], label [[TMP9]] -; CHECK: [[DOT0:%.*]] = phi i32 [ [[TMP4]], [[TMP7]] ], [ [[TMP4]], [[TMP7]] ], [ [[DOT1:%.*]], [[TMP13]] ], [ [[TMP4]], [[TMP3]] ] +; CHECK: 9: +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP4]], [[TMP7]] ], [ [[TMP4]], [[TMP7]] ], [ [[DOT1:%.*]], [[TMP13]] ], [ [[TMP4]], [[TMP3]] ] ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr @b, align 4 ; CHECK-NEXT: [[TMP11:%.*]] = sdiv i32 [[TMP10]], [[DOT0]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[TMP13]], label [[TMP13]] -; CHECK: [[DOT1]] = phi i32 [ [[DOT0]], [[TMP9]] ], [ [[DOT0]], [[TMP9]] ], [ undef, [[TMP0:%.*]] ] +; CHECK: 13: +; CHECK-NEXT: [[DOT1]] = phi i32 [ [[DOT0]], [[TMP9]] ], [ [[DOT0]], [[TMP9]] ], [ undef, [[TMP0:%.*]] ] ; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr @e, align 4 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP9]] -; CHECK: ret i32 0 +; CHECK: 16: +; CHECK-NEXT: ret i32 0 ; %1 = load i32, ptr @d, align 4 %2 = icmp eq i32 %1, 0 diff --git a/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll b/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll index bc1d39f..e4fd4cc 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=print-predicateinfo < %s 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments +; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s ; Don't insert predicate info for conditions with a single target. @a = global i32 6, align 4 @c = global i32 -1, align 4 @@ -13,26 +13,32 @@ define i32 @main() { ; CHECK-LABEL: @main( ; CHECK-NEXT: store i32 6, ptr @e, align 4 ; CHECK-NEXT: br label [[TMP1:%.*]] -; CHECK: [[TMP2:%.*]] = load i32, ptr @d, align 4 +; CHECK: 1: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @d, align 4 ; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [6 x i32], ptr @b, i64 0, i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 (ptr, ...) @printf(ptr @.str, i32 [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @a, align 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[TMP8]], label %thread-pre-split, label [[TMP9:%.*]] -; CHECK: [[TMP10:%.*]] = load i32, ptr @e, align 4 +; CHECK-NEXT: br i1 [[TMP8]], label [[THREAD_PRE_SPLIT:%.*]], label [[TMP9:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr @e, align 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 ; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP12]] ; CHECK: thread-pre-split: ; CHECK-NEXT: [[DOTPR:%.*]] = load i32, ptr @e, align 4 ; CHECK-NEXT: br label [[TMP12]] -; CHECK: [[TMP13:%.*]] = phi i32 [ [[DOTPR]], %thread-pre-split ], [ [[TMP10]], [[TMP9]] ], [ [[TMP10]], [[TMP9]] ] +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[DOTPR]], [[THREAD_PRE_SPLIT]] ], [ [[TMP10]], [[TMP9]] ], [ [[TMP10]], [[TMP9]] ] ; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP15]] -; CHECK: br i1 [[TMP14]], label [[TMP16:%.*]], label [[TMP17:%.*]] -; CHECK: br label [[TMP17]] -; CHECK: [[DOT0:%.*]] = phi i32 [ 1, [[TMP16]] ], [ -1, [[TMP15]] ] +; CHECK: 15: +; CHECK-NEXT: br i1 [[TMP14]], label [[TMP16:%.*]], label [[TMP17:%.*]] +; CHECK: 16: +; CHECK-NEXT: br label [[TMP17]] +; CHECK: 17: +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 1, [[TMP16]] ], [ -1, [[TMP15]] ] ; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[DOT0]], 8693 ; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr @c, align 4 ; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] @@ -40,7 +46,8 @@ define i32 @main() { ; CHECK-NEXT: store i32 [[TMP21]], ptr @d, align 4 ; CHECK-NEXT: [[TMP22:%.*]] = icmp slt i32 [[TMP20]], -2 ; CHECK-NEXT: br i1 [[TMP22]], label [[TMP1]], label [[TMP23:%.*]] -; CHECK: ret i32 0 +; CHECK: 23: +; CHECK-NEXT: ret i32 0 ; store i32 6, ptr @e, align 4 br label %1 diff --git a/llvm/test/Transforms/Util/PredicateInfo/testandor.ll b/llvm/test/Transforms/Util/PredicateInfo/testandor.ll index cc1dc4e..d29aadd 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/testandor.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/testandor.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments ; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s declare void @foo(i1) @@ -10,12 +10,17 @@ define void @test_or(i32 %x, i32 %y) { ; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[Z:%.*]] = or i1 [[XZ]], [[YZ]] -; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 -; CHECK: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1 -; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 -; CHECK-NEXT: br i1 [[Z]], label [[ONEOF:%.*]], label [[NEITHER:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = or i1 [[XZ]], [[YZ]] Edge: [label [[TMP0:%.*]],label [[NEITHER:%.*]]], RenamedOp: [[Z]] } +; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[XZ]] } +; CHECK-NEXT: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[YZ]] } +; CHECK-NEXT: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 +; CHECK-NEXT: br i1 [[Z]], label [[ONEOF:%.*]], label [[NEITHER]] ; CHECK: oneof: ; CHECK-NEXT: call void @foo(i1 [[XZ]]) ; CHECK-NEXT: call void @foo(i1 [[YZ]]) @@ -55,12 +60,17 @@ define void @test_or_logical(i32 %x, i32 %y) { ; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[Z:%.*]] = select i1 [[XZ]], i1 true, i1 [[YZ]] -; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 -; CHECK: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1 -; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 -; CHECK-NEXT: br i1 [[Z]], label [[ONEOF:%.*]], label [[NEITHER:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = select i1 [[XZ]], i1 true, i1 [[YZ]] Edge: [label [[TMP0:%.*]],label [[NEITHER:%.*]]], RenamedOp: [[Z]] } +; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[XZ]] } +; CHECK-NEXT: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[YZ]] } +; CHECK-NEXT: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 +; CHECK-NEXT: br i1 [[Z]], label [[ONEOF:%.*]], label [[NEITHER]] ; CHECK: oneof: ; CHECK-NEXT: call void @foo(i1 [[XZ]]) ; CHECK-NEXT: call void @foo(i1 [[YZ]]) @@ -100,12 +110,17 @@ define void @test_and(i32 %x, i32 %y) { ; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]] -; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 -; CHECK: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1 -; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 -; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = and i1 [[XZ]], [[YZ]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[Z]] } +; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH:%.*]]], RenamedOp: [[XZ]] } +; CHECK-NEXT: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[YZ]] } +; CHECK-NEXT: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 +; CHECK-NEXT: br i1 [[Z]], label [[BOTH]], label [[NOPE]] ; CHECK: both: ; CHECK-NEXT: call void @foo(i1 [[XZ_0]]) ; CHECK-NEXT: call void @foo(i1 [[YZ_0]]) @@ -145,12 +160,17 @@ define void @test_and_logical(i32 %x, i32 %y) { ; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[Z:%.*]] = select i1 [[XZ]], i1 [[YZ]], i1 false -; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 -; CHECK: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1 -; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 -; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = select i1 [[XZ]], i1 [[YZ]], i1 false Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[Z]] } +; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH:%.*]]], RenamedOp: [[XZ]] } +; CHECK-NEXT: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[YZ]] } +; CHECK-NEXT: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32 +; CHECK-NEXT: br i1 [[Z]], label [[BOTH]], label [[NOPE]] ; CHECK: both: ; CHECK-NEXT: call void @foo(i1 [[XZ_0]]) ; CHECK-NEXT: call void @foo(i1 [[YZ_0]]) @@ -190,12 +210,17 @@ define void @testandsame(i32 %x, i32 %y) { ; CHECK-NEXT: [[XGT:%.*]] = icmp sgt i32 [[X:%.*]], 0 ; CHECK-NEXT: [[XLT:%.*]] = icmp slt i32 [[X]], 100 ; CHECK-NEXT: [[Z:%.*]] = and i1 [[XGT]], [[XLT]] -; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 -; CHECK: [[XGT_0:%.*]] = bitcast i1 [[XGT]] to i1 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[X_0_1:%.*]] = bitcast i32 [[X_0]] to i32 -; CHECK: [[XLT_0:%.*]] = bitcast i1 [[XLT]] to i1 -; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = and i1 [[XGT]], [[XLT]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[Z]] } +; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XGT]] = icmp sgt i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH:%.*]]], RenamedOp: [[XGT]] } +; CHECK-NEXT: [[XGT_0:%.*]] = bitcast i1 [[XGT]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XGT]] = icmp sgt i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XLT]] = icmp slt i32 [[X]], 100 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0_1:%.*]] = bitcast i32 [[X_0]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XLT]] = icmp slt i32 [[X]], 100 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[XLT]] } +; CHECK-NEXT: [[XLT_0:%.*]] = bitcast i1 [[XLT]] to i1 +; CHECK-NEXT: br i1 [[Z]], label [[BOTH]], label [[NOPE]] ; CHECK: both: ; CHECK-NEXT: call void @foo(i1 [[XGT_0]]) ; CHECK-NEXT: call void @foo(i1 [[XLT_0]]) @@ -229,17 +254,27 @@ define void @testandassume(i32 %x, i32 %y) { ; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]] ; CHECK-NEXT: call void @llvm.assume(i1 [[Z]]) -; CHECK: [[TMP1:%.*]] = bitcast i32 [[Y]] to i32 -; CHECK: [[TMP2:%.*]] = bitcast i1 [[YZ]] to i1 -; CHECK: [[TMP3:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[TMP4:%.*]] = bitcast i1 [[XZ]] to i1 -; CHECK: [[TMP5:%.*]] = bitcast i1 [[Z]] to i1 -; CHECK: [[DOT0:%.*]] = bitcast i1 [[TMP5]] to i1 -; CHECK: [[DOT01:%.*]] = bitcast i1 [[TMP4]] to i1 -; CHECK: [[DOT02:%.*]] = bitcast i32 [[TMP3]] to i32 -; CHECK: [[DOT03:%.*]] = bitcast i1 [[TMP2]] to i1 -; CHECK: [[DOT04:%.*]] = bitcast i32 [[TMP1]] to i32 -; CHECK-NEXT: br i1 [[TMP5]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK-NEXT: ; assume predicate info { Comparison: [[YZ]] = icmp eq i32 [[Y]], 0, RenamedOp: [[Y]] } +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[Y]] to i32 +; CHECK-NEXT: ; assume predicate info { Comparison: [[YZ]] = icmp eq i32 [[Y]], 0, RenamedOp: [[YZ]] } +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[YZ]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[XZ]] = icmp eq i32 [[X]], 0, RenamedOp: [[X]] } +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; assume predicate info { Comparison: [[XZ]] = icmp eq i32 [[X]], 0, RenamedOp: [[XZ]] } +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i1 [[XZ]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[Z]] = and i1 [[XZ]], [[YZ]], RenamedOp: [[Z]] } +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i1 [[Z]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = and i1 [[XZ]], [[YZ]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[TMP5]] } +; CHECK-NEXT: [[DOT0:%.*]] = bitcast i1 [[TMP5]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH:%.*]]], RenamedOp: [[XZ]] } +; CHECK-NEXT: [[DOT01:%.*]] = bitcast i1 [[TMP4]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[DOT02:%.*]] = bitcast i32 [[TMP3]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[YZ]] } +; CHECK-NEXT: [[DOT03:%.*]] = bitcast i1 [[TMP2]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[Y]] } +; CHECK-NEXT: [[DOT04:%.*]] = bitcast i32 [[TMP1]] to i32 +; CHECK-NEXT: br i1 [[TMP5]], label [[BOTH]], label [[NOPE]] ; CHECK: both: ; CHECK-NEXT: call void @foo(i1 [[DOT01]]) ; CHECK-NEXT: call void @foo(i1 [[DOT03]]) @@ -274,9 +309,11 @@ define void @testorassume(i32 %x, i32 %y) { ; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[Z:%.*]] = or i1 [[XZ]], [[YZ]] ; CHECK-NEXT: call void @llvm.assume(i1 [[Z]]) -; CHECK: [[TMP1:%.*]] = bitcast i1 [[Z]] to i1 -; CHECK: [[DOT0:%.*]] = bitcast i1 [[TMP1]] to i1 -; CHECK-NEXT: br i1 [[TMP1]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK-NEXT: ; assume predicate info { Comparison: [[Z]] = or i1 [[XZ]], [[YZ]], RenamedOp: [[Z]] } +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[Z]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = or i1 [[XZ]], [[YZ]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[TMP1]] } +; CHECK-NEXT: [[DOT0:%.*]] = bitcast i1 [[TMP1]] to i1 +; CHECK-NEXT: br i1 [[TMP1]], label [[BOTH:%.*]], label [[NOPE]] ; CHECK: both: ; CHECK-NEXT: call void @foo(i1 [[XZ]]) ; CHECK-NEXT: call void @foo(i1 [[YZ]]) @@ -307,12 +344,17 @@ define void @test_and_one_unknown_cond(i32 %x, i1 %c1) { ; CHECK-LABEL: @test_and_one_unknown_cond( ; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[A:%.*]] = and i1 [[C1:%.*]], [[C2]] -; CHECK: [[A_0:%.*]] = bitcast i1 [[A]] to i1 -; CHECK: [[A_1:%.*]] = bitcast i1 [[A]] to i1 -; CHECK: [[C1_0:%.*]] = bitcast i1 [[C1]] to i1 -; CHECK: [[C2_0:%.*]] = bitcast i1 [[C2]] to i1 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK-NEXT: br i1 [[A]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A]] = and i1 [[C1]], [[C2]] Edge: [label [[TMP0:%.*]],label [[BOTH:%.*]]], RenamedOp: [[A]] } +; CHECK-NEXT: [[A_0:%.*]] = bitcast i1 [[A]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A]] = and i1 [[C1]], [[C2]] Edge: [label [[TMP0]],label [[NOPE:%.*]]], RenamedOp: [[A]] } +; CHECK-NEXT: [[A_1:%.*]] = bitcast i1 [[A]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison:i1 [[C1]] Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[C1]] } +; CHECK-NEXT: [[C1_0:%.*]] = bitcast i1 [[C1]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[C2]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[C2]] } +; CHECK-NEXT: [[C2_0:%.*]] = bitcast i1 [[C2]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[C2]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: br i1 [[A]], label [[BOTH]], label [[NOPE]] ; CHECK: both: ; CHECK-NEXT: call void @bar(i32 [[X_0]]) ; CHECK-NEXT: call void @foo(i1 [[C1_0]]) @@ -349,12 +391,17 @@ define void @test_or_one_unknown_cond(i32 %x, i1 %c1) { ; CHECK-LABEL: @test_or_one_unknown_cond( ; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[A:%.*]] = or i1 [[C1:%.*]], [[C2]] -; CHECK: [[A_0:%.*]] = bitcast i1 [[A]] to i1 -; CHECK: [[A_1:%.*]] = bitcast i1 [[A]] to i1 -; CHECK: [[C1_0:%.*]] = bitcast i1 [[C1]] to i1 -; CHECK: [[C2_0:%.*]] = bitcast i1 [[C2]] to i1 -; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK-NEXT: br i1 [[A]], label [[NOPE:%.*]], label [[BOTH_INVERTED:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A]] = or i1 [[C1]], [[C2]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[A]] } +; CHECK-NEXT: [[A_0:%.*]] = bitcast i1 [[A]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A]] = or i1 [[C1]], [[C2]] Edge: [label [[TMP0]],label [[BOTH_INVERTED:%.*]]], RenamedOp: [[A]] } +; CHECK-NEXT: [[A_1:%.*]] = bitcast i1 [[A]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison:i1 [[C1]] Edge: [label [[TMP0]],label [[BOTH_INVERTED]]], RenamedOp: [[C1]] } +; CHECK-NEXT: [[C1_0:%.*]] = bitcast i1 [[C1]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[C2]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH_INVERTED]]], RenamedOp: [[C2]] } +; CHECK-NEXT: [[C2_0:%.*]] = bitcast i1 [[C2]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[C2]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH_INVERTED]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: br i1 [[A]], label [[NOPE]], label [[BOTH_INVERTED]] ; CHECK: both_inverted: ; CHECK-NEXT: call void @bar(i32 [[X_0]]) ; CHECK-NEXT: call void @foo(i1 [[C1_0]]) @@ -391,13 +438,19 @@ define void @test_and_chain(i1 %a, i1 %b, i1 %c) { ; CHECK-LABEL: @test_and_chain( ; CHECK-NEXT: [[AND1:%.*]] = and i1 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C:%.*]] -; CHECK: [[AND2_0:%.*]] = bitcast i1 [[AND2]] to i1 -; CHECK: [[AND2_1:%.*]] = bitcast i1 [[AND2]] to i1 -; CHECK: [[AND1_0:%.*]] = bitcast i1 [[AND1]] to i1 -; CHECK: [[A_0:%.*]] = bitcast i1 [[A]] to i1 -; CHECK: [[B_0:%.*]] = bitcast i1 [[B]] to i1 -; CHECK: [[C_0:%.*]] = bitcast i1 [[C]] to i1 -; CHECK-NEXT: br i1 [[AND2]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[AND2]] = and i1 [[AND1]], [[C]] Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[AND2]] } +; CHECK-NEXT: [[AND2_0:%.*]] = bitcast i1 [[AND2]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[AND2]] = and i1 [[AND1]], [[C]] Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[AND2]] } +; CHECK-NEXT: [[AND2_1:%.*]] = bitcast i1 [[AND2]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[AND1]] = and i1 [[A]], [[B]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[AND1]] } +; CHECK-NEXT: [[AND1_0:%.*]] = bitcast i1 [[AND1]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison:i1 [[A]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A]] } +; CHECK-NEXT: [[A_0:%.*]] = bitcast i1 [[A]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison:i1 [[B]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[B]] } +; CHECK-NEXT: [[B_0:%.*]] = bitcast i1 [[B]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison:i1 [[C]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[C]] } +; CHECK-NEXT: [[C_0:%.*]] = bitcast i1 [[C]] to i1 +; CHECK-NEXT: br i1 [[AND2]], label [[IF]], label [[ELSE]] ; CHECK: if: ; CHECK-NEXT: call void @foo(i1 [[A_0]]) ; CHECK-NEXT: call void @foo(i1 [[B_0]]) @@ -438,13 +491,19 @@ define void @test_or_chain(i1 %a, i1 %b, i1 %c) { ; CHECK-LABEL: @test_or_chain( ; CHECK-NEXT: [[OR1:%.*]] = or i1 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C:%.*]] -; CHECK: [[OR2_0:%.*]] = bitcast i1 [[OR2]] to i1 -; CHECK: [[OR2_1:%.*]] = bitcast i1 [[OR2]] to i1 -; CHECK: [[OR1_0:%.*]] = bitcast i1 [[OR1]] to i1 -; CHECK: [[A_0:%.*]] = bitcast i1 [[A]] to i1 -; CHECK: [[B_0:%.*]] = bitcast i1 [[B]] to i1 -; CHECK: [[C_0:%.*]] = bitcast i1 [[C]] to i1 -; CHECK-NEXT: br i1 [[OR2]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[OR2]] = or i1 [[OR1]], [[C]] Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[OR2]] } +; CHECK-NEXT: [[OR2_0:%.*]] = bitcast i1 [[OR2]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[OR2]] = or i1 [[OR1]], [[C]] Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[OR2]] } +; CHECK-NEXT: [[OR2_1:%.*]] = bitcast i1 [[OR2]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[OR1]] = or i1 [[A]], [[B]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[OR1]] } +; CHECK-NEXT: [[OR1_0:%.*]] = bitcast i1 [[OR1]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison:i1 [[A]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A]] } +; CHECK-NEXT: [[A_0:%.*]] = bitcast i1 [[A]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison:i1 [[B]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[B]] } +; CHECK-NEXT: [[B_0:%.*]] = bitcast i1 [[B]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison:i1 [[C]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[C]] } +; CHECK-NEXT: [[C_0:%.*]] = bitcast i1 [[C]] to i1 +; CHECK-NEXT: br i1 [[OR2]], label [[IF]], label [[ELSE]] ; CHECK: if: ; CHECK-NEXT: call void @foo(i1 [[A]]) ; CHECK-NEXT: call void @foo(i1 [[B]]) @@ -485,11 +544,15 @@ define void @test_and_or_mixed(i1 %a, i1 %b, i1 %c) { ; CHECK-LABEL: @test_and_or_mixed( ; CHECK-NEXT: [[OR:%.*]] = or i1 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and i1 [[OR]], [[C:%.*]] -; CHECK: [[AND_0:%.*]] = bitcast i1 [[AND]] to i1 -; CHECK: [[AND_1:%.*]] = bitcast i1 [[AND]] to i1 -; CHECK: [[OR_0:%.*]] = bitcast i1 [[OR]] to i1 -; CHECK: [[C_0:%.*]] = bitcast i1 [[C]] to i1 -; CHECK-NEXT: br i1 [[AND]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[AND]] = and i1 [[OR]], [[C]] Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[AND]] } +; CHECK-NEXT: [[AND_0:%.*]] = bitcast i1 [[AND]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[AND]] = and i1 [[OR]], [[C]] Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[AND]] } +; CHECK-NEXT: [[AND_1:%.*]] = bitcast i1 [[AND]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[OR]] = or i1 [[A]], [[B]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[OR]] } +; CHECK-NEXT: [[OR_0:%.*]] = bitcast i1 [[OR]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison:i1 [[C]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[C]] } +; CHECK-NEXT: [[C_0:%.*]] = bitcast i1 [[C]] to i1 +; CHECK-NEXT: br i1 [[AND]], label [[IF]], label [[ELSE]] ; CHECK: if: ; CHECK-NEXT: call void @foo(i1 [[A]]) ; CHECK-NEXT: call void @foo(i1 [[B]]) @@ -542,16 +605,25 @@ define void @test_deep_and_chain(i1 %a1) { ; CHECK-NEXT: [[A13:%.*]] = and i1 [[A12]], true ; CHECK-NEXT: [[A14:%.*]] = and i1 [[A13]], true ; CHECK-NEXT: [[A15:%.*]] = and i1 [[A14]], true -; CHECK: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1 -; CHECK: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1 -; CHECK: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1 -; CHECK: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1 -; CHECK: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1 -; CHECK: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1 -; CHECK: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1 -; CHECK: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1 -; CHECK: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1 -; CHECK-NEXT: br i1 [[A15]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A15]] = and i1 [[A14]], true Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[A15]] } +; CHECK-NEXT: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A15]] = and i1 [[A14]], true Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[A15]] } +; CHECK-NEXT: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A14]] = and i1 [[A13]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A14]] } +; CHECK-NEXT: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A13]] = and i1 [[A12]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A13]] } +; CHECK-NEXT: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A12]] = and i1 [[A11]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A12]] } +; CHECK-NEXT: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A11]] = and i1 [[A10]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A11]] } +; CHECK-NEXT: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A10]] = and i1 [[A9]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A10]] } +; CHECK-NEXT: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A9]] = and i1 [[A8]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A9]] } +; CHECK-NEXT: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A8]] = and i1 [[A7]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A8]] } +; CHECK-NEXT: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1 +; CHECK-NEXT: br i1 [[A15]], label [[IF]], label [[ELSE]] ; CHECK: if: ; CHECK-NEXT: call void @foo(i1 [[A1]]) ; CHECK-NEXT: call void @foo(i1 [[A2]]) @@ -656,16 +728,25 @@ define void @test_deep_and_tree(i1 %a1) { ; CHECK-NEXT: [[A13:%.*]] = and i1 [[A12]], [[A12]] ; CHECK-NEXT: [[A14:%.*]] = and i1 [[A13]], [[A13]] ; CHECK-NEXT: [[A15:%.*]] = and i1 [[A14]], [[A14]] -; CHECK: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1 -; CHECK: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1 -; CHECK: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1 -; CHECK: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1 -; CHECK: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1 -; CHECK: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1 -; CHECK: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1 -; CHECK: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1 -; CHECK: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1 -; CHECK-NEXT: br i1 [[A15]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A15]] = and i1 [[A14]], [[A14]] Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[A15]] } +; CHECK-NEXT: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A15]] = and i1 [[A14]], [[A14]] Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[A15]] } +; CHECK-NEXT: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A14]] = and i1 [[A13]], [[A13]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A14]] } +; CHECK-NEXT: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A13]] = and i1 [[A12]], [[A12]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A13]] } +; CHECK-NEXT: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A12]] = and i1 [[A11]], [[A11]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A12]] } +; CHECK-NEXT: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A11]] = and i1 [[A10]], [[A10]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A11]] } +; CHECK-NEXT: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A10]] = and i1 [[A9]], [[A9]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A10]] } +; CHECK-NEXT: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A9]] = and i1 [[A8]], [[A8]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A9]] } +; CHECK-NEXT: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A8]] = and i1 [[A7]], [[A7]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A8]] } +; CHECK-NEXT: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1 +; CHECK-NEXT: br i1 [[A15]], label [[IF]], label [[ELSE]] ; CHECK: if: ; CHECK-NEXT: call void @foo(i1 [[A1]]) ; CHECK-NEXT: call void @foo(i1 [[A2]]) @@ -770,16 +851,25 @@ define void @test_deep_or_tree(i1 %a1) { ; CHECK-NEXT: [[A13:%.*]] = or i1 [[A12]], [[A12]] ; CHECK-NEXT: [[A14:%.*]] = or i1 [[A13]], [[A13]] ; CHECK-NEXT: [[A15:%.*]] = or i1 [[A14]], [[A14]] -; CHECK: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1 -; CHECK: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1 -; CHECK: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1 -; CHECK: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1 -; CHECK: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1 -; CHECK: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1 -; CHECK: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1 -; CHECK: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1 -; CHECK: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1 -; CHECK-NEXT: br i1 [[A15]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A15]] = or i1 [[A14]], [[A14]] Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[A15]] } +; CHECK-NEXT: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A15]] = or i1 [[A14]], [[A14]] Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[A15]] } +; CHECK-NEXT: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A14]] = or i1 [[A13]], [[A13]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A14]] } +; CHECK-NEXT: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A13]] = or i1 [[A12]], [[A12]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A13]] } +; CHECK-NEXT: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A12]] = or i1 [[A11]], [[A11]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A12]] } +; CHECK-NEXT: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A11]] = or i1 [[A10]], [[A10]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A11]] } +; CHECK-NEXT: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A10]] = or i1 [[A9]], [[A9]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A10]] } +; CHECK-NEXT: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A9]] = or i1 [[A8]], [[A8]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A9]] } +; CHECK-NEXT: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A8]] = or i1 [[A7]], [[A7]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A8]] } +; CHECK-NEXT: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1 +; CHECK-NEXT: br i1 [[A15]], label [[IF]], label [[ELSE]] ; CHECK: if: ; CHECK-NEXT: call void @foo(i1 [[A1]]) ; CHECK-NEXT: call void @foo(i1 [[A2]]) @@ -873,11 +963,16 @@ define void @test_assume_and_chain(i1 %a, i1 %b, i1 %c) { ; CHECK-NEXT: [[AND1:%.*]] = and i1 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C:%.*]] ; CHECK-NEXT: call void @llvm.assume(i1 [[AND2]]) -; CHECK: [[TMP1:%.*]] = bitcast i1 [[C]] to i1 -; CHECK: [[TMP2:%.*]] = bitcast i1 [[B]] to i1 -; CHECK: [[TMP3:%.*]] = bitcast i1 [[A]] to i1 -; CHECK: [[TMP4:%.*]] = bitcast i1 [[AND1]] to i1 -; CHECK: [[TMP5:%.*]] = bitcast i1 [[AND2]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison:i1 [[C]], RenamedOp: [[C]] } +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[C]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison:i1 [[B]], RenamedOp: [[B]] } +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[B]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison:i1 [[A]], RenamedOp: [[A]] } +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i1 [[A]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[AND1]] = and i1 [[A]], [[B]], RenamedOp: [[AND1]] } +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i1 [[AND1]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[AND2]] = and i1 [[AND1]], [[C]], RenamedOp: [[AND2]] } +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i1 [[AND2]] to i1 ; CHECK-NEXT: call void @foo(i1 [[TMP3]]) ; CHECK-NEXT: call void @foo(i1 [[TMP2]]) ; CHECK-NEXT: call void @foo(i1 [[TMP1]]) @@ -901,7 +996,8 @@ define void @test_assume_or_chain(i1 %a, i1 %b, i1 %c) { ; CHECK-NEXT: [[OR1:%.*]] = or i1 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C:%.*]] ; CHECK-NEXT: call void @llvm.assume(i1 [[OR2]]) -; CHECK: [[TMP1:%.*]] = bitcast i1 [[OR2]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[OR2]] = or i1 [[OR1]], [[C]], RenamedOp: [[OR2]] } +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[OR2]] to i1 ; CHECK-NEXT: call void @foo(i1 [[A]]) ; CHECK-NEXT: call void @foo(i1 [[B]]) ; CHECK-NEXT: call void @foo(i1 [[C]]) @@ -937,14 +1033,22 @@ define void @test_assume_deep_and_tree(i1 %a1) { ; CHECK-NEXT: [[A14:%.*]] = and i1 [[A13]], [[A13]] ; CHECK-NEXT: [[A15:%.*]] = and i1 [[A14]], [[A14]] ; CHECK-NEXT: call void @llvm.assume(i1 [[A15]]) -; CHECK: [[TMP1:%.*]] = bitcast i1 [[A8]] to i1 -; CHECK: [[TMP2:%.*]] = bitcast i1 [[A9]] to i1 -; CHECK: [[TMP3:%.*]] = bitcast i1 [[A10]] to i1 -; CHECK: [[TMP4:%.*]] = bitcast i1 [[A11]] to i1 -; CHECK: [[TMP5:%.*]] = bitcast i1 [[A12]] to i1 -; CHECK: [[TMP6:%.*]] = bitcast i1 [[A13]] to i1 -; CHECK: [[TMP7:%.*]] = bitcast i1 [[A14]] to i1 -; CHECK: [[TMP8:%.*]] = bitcast i1 [[A15]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[A8]] = and i1 [[A7]], [[A7]], RenamedOp: [[A8]] } +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[A8]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[A9]] = and i1 [[A8]], [[A8]], RenamedOp: [[A9]] } +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[A9]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[A10]] = and i1 [[A9]], [[A9]], RenamedOp: [[A10]] } +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i1 [[A10]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[A11]] = and i1 [[A10]], [[A10]], RenamedOp: [[A11]] } +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i1 [[A11]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[A12]] = and i1 [[A11]], [[A11]], RenamedOp: [[A12]] } +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i1 [[A12]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[A13]] = and i1 [[A12]], [[A12]], RenamedOp: [[A13]] } +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i1 [[A13]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[A14]] = and i1 [[A13]], [[A13]], RenamedOp: [[A14]] } +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i1 [[A14]] to i1 +; CHECK-NEXT: ; assume predicate info { Comparison: [[A15]] = and i1 [[A14]], [[A14]], RenamedOp: [[A15]] } +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i1 [[A15]] to i1 ; CHECK-NEXT: call void @foo(i1 [[A1]]) ; CHECK-NEXT: call void @foo(i1 [[A2]]) ; CHECK-NEXT: call void @foo(i1 [[A3]]) @@ -1001,13 +1105,15 @@ define i32 @test_and_with_phinode(i32 %x) { ; CHECK-NEXT: [[XGE1:%.*]] = icmp uge i32 [[X:%.*]], 1 ; CHECK-NEXT: [[XLT2:%.*]] = icmp ult i32 [[X]], 2 ; CHECK-NEXT: [[AND:%.*]] = and i1 [[XGE1]], [[XLT2]] -; CHECK: [[X_0_1:%.*]] = bitcast i32 [[X]] to i32 -; CHECK: [[X_0_2:%.*]] = bitcast i32 [[X_0_1]] to i32 -; CHECK-NEXT: br i1 [[AND]], label [[PHI:%.*]], label [[NOPE:%.*]] +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XGE1]] = icmp uge i32 [[X]], 1 Edge: [label [[ENTRY:%.*]],label [[PHI:%.*]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0_1:%.*]] = bitcast i32 [[X]] to i32 +; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XLT2]] = icmp ult i32 [[X]], 2 Edge: [label [[ENTRY]],label [[PHI]]], RenamedOp: [[X]] } +; CHECK-NEXT: [[X_0_2:%.*]] = bitcast i32 [[X_0_1]] to i32 +; CHECK-NEXT: br i1 [[AND]], label [[PHI]], label [[NOPE:%.*]] ; CHECK: nope: ; CHECK-NEXT: br label [[PHI]] ; CHECK: phi: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[X_0_2]], [[ENTRY:%.*]] ], [ 1, [[NOPE]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[X_0_2]], [[ENTRY]] ], [ 1, [[NOPE]] ] ; CHECK-NEXT: ret i32 [[RES]] ; entry: diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll new file mode 100644 index 0000000..bfd216d --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -S | FileCheck %s + +; Test whether UTC checks empty lines instead of skipping them. +define i32 @test(i32 %x) { +entry: + br label %block1 + +block1: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %block2, label %exit1 + +block2: + br i1 %cmp, label %block3, label %exit2 + +block3: + br i1 %cmp, label %exit3, label %exit4 + +exit1: + ret i32 0 + +exit2: + ret i32 %x + +exit3: + ret i32 %x + +exit4: + ret i32 %x +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll.expected new file mode 100644 index 0000000..c5f822d --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll.expected @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 7 +; RUN: opt < %s -S | FileCheck %s + +; Test whether UTC checks empty lines instead of skipping them. +define i32 @test(i32 %x) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[BLOCK1:.*]] +; CHECK-EMPTY: +; CHECK-NEXT: [[BLOCK1]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]] +; CHECK-EMPTY: +; CHECK-NEXT: [[BLOCK2]]: +; CHECK-NEXT: br i1 [[CMP]], label %[[BLOCK3:.*]], label %[[EXIT2:.*]] +; CHECK-EMPTY: +; CHECK-NEXT: [[BLOCK3]]: +; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT3:.*]], label %[[EXIT4:.*]] +; CHECK-EMPTY: +; CHECK-NEXT: [[EXIT1]]: +; CHECK-NEXT: ret i32 0 +; CHECK-EMPTY: +; CHECK-NEXT: [[EXIT2]]: +; CHECK-NEXT: ret i32 [[X]] +; CHECK-EMPTY: +; CHECK-NEXT: [[EXIT3]]: +; CHECK-NEXT: ret i32 [[X]] +; CHECK-EMPTY: +; CHECK-NEXT: [[EXIT4]]: +; CHECK-NEXT: ret i32 [[X]] +; +entry: + br label %block1 + +block1: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %block2, label %exit1 + +block2: + br i1 %cmp, label %block3, label %exit2 + +block3: + br i1 %cmp, label %exit3, label %exit4 + +exit1: + ret i32 0 + +exit2: + ret i32 %x + +exit3: + ret i32 %x + +exit4: + ret i32 %x +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected index b1977e7..8cab0bb 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected @@ -12,13 +12,17 @@ define i8 @testi8(i8 %x) { ; CHECK-NEXT: i8 2, label %[[CASE3:.*]] ; CHECK-NEXT: i8 3, label %[[CASE3]] ; CHECK-NEXT: ] -; CHECK: [[DEFAULT]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[DEFAULT]]: ; CHECK-NEXT: ret i8 0 -; CHECK: [[CASE1]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[CASE1]]: ; CHECK-NEXT: ret i8 1 -; CHECK: [[CASE2]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[CASE2]]: ; CHECK-NEXT: ret i8 2 -; CHECK: [[CASE3]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[CASE3]]: ; CHECK-NEXT: ret i8 3 ; switch i8 %x, label %default [ @@ -46,13 +50,17 @@ define i32 @testi32(i32 %x) { ; CHECK-NEXT: i32 2, label %[[CASE3:.*]] ; CHECK-NEXT: i32 3, label %[[CASE3]] ; CHECK-NEXT: ] -; CHECK: [[DEFAULT]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[DEFAULT]]: ; CHECK-NEXT: ret i32 0 -; CHECK: [[CASE1]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[CASE1]]: ; CHECK-NEXT: ret i32 1 -; CHECK: [[CASE2]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[CASE2]]: ; CHECK-NEXT: ret i32 2 -; CHECK: [[CASE3]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[CASE3]]: ; CHECK-NEXT: ret i32 3 ; switch i32 %x, label %default [ @@ -80,13 +88,17 @@ define i128 @testi128(i128 %x) { ; CHECK-NEXT: i128 2, label %[[CASE3:.*]] ; CHECK-NEXT: i128 3, label %[[CASE3]] ; CHECK-NEXT: ] -; CHECK: [[DEFAULT]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[DEFAULT]]: ; CHECK-NEXT: ret i128 0 -; CHECK: [[CASE1]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[CASE1]]: ; CHECK-NEXT: ret i128 1 -; CHECK: [[CASE2]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[CASE2]]: ; CHECK-NEXT: ret i128 2 -; CHECK: [[CASE3]]: +; CHECK-EMPTY: +; CHECK-NEXT: [[CASE3]]: ; CHECK-NEXT: ret i128 3 ; switch i128 %x, label %default [ diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/check_empty.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/check_empty.test new file mode 100644 index 0000000..670bda2 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/check_empty.test @@ -0,0 +1,3 @@ +## test whether the UTC generates CHECK-EMPTY for blank lines +# RUN: cp -f %S/Inputs/check_empty.ll %t.ll && %update_test_checks %t.ll --version 7 +# RUN: diff -u %t.ll %S/Inputs/check_empty.ll.expected diff --git a/llvm/test/tools/llvm-config/paths.test b/llvm/test/tools/llvm-config/paths.test index 419f155..61d86f7 100644 --- a/llvm/test/tools/llvm-config/paths.test +++ b/llvm/test/tools/llvm-config/paths.test @@ -4,18 +4,34 @@ RUN: llvm-config --bindir 2>&1 | FileCheck --check-prefix=CHECK-BINDIR %s CHECK-BINDIR: {{.*}}{{/|\\}}bin CHECK-BINDIR-NOT: error: CHECK-BINDIR-NOT: warning +RUN: llvm-config --bindir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-BINDIR2 %s +CHECK-BINDIR2: {{.*}}{{/|\\\\}}bin +CHECK-BINDIR2-NOT: error: +CHECK-BINDIR2-NOT: warning RUN: llvm-config --includedir 2>&1 | FileCheck --check-prefix=CHECK-INCLUDEDIR %s CHECK-INCLUDEDIR: {{.*}}{{/|\\}}include CHECK-INCLUDEDIR-NOT: error: CHECK-INCLUDEDIR-NOT: warning +RUN: llvm-config --includedir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-INCLUDEDIR2 %s +CHECK-INCLUDEDIR2: {{.*}}{{/|\\\\}}include +CHECK-INCLUDEDIR2-NOT: error: +CHECK-INCLUDEDIR2-NOT: warning RUN: llvm-config --libdir 2>&1 | FileCheck --check-prefix=CHECK-LIBDIR %s CHECK-LIBDIR: {{.*}}{{/|\\}}lib{{.*}} CHECK-LIBDIR-NOT: error: CHECK-LIBDIR-NOT: warning +RUN: llvm-config --libdir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-LIBDIR2 %s +CHECK-LIBDIR2: {{.*}}{{/|\\\\}}lib{{.*}} +CHECK-LIBDIR2-NOT: error: +CHECK-LIBDIR2-NOT: warning RUN: llvm-config --cmakedir 2>&1 | FileCheck --check-prefix=CHECK-CMAKEDIR %s CHECK-CMAKEDIR: {{.*}}{{/|\\}}cmake{{/|\\}}llvm CHECK-CMAKEDIR-NOT: error: CHECK-CMAKEDIR-NOT: warning +RUN: llvm-config --cmakedir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-CMAKEDIR2 %s +CHECK-CMAKEDIR2: {{.*}}{{/|\\\\}}cmake{{/|\\\\}}llvm +CHECK-CMAKEDIR2-NOT: error: +CHECK-CMAKEDIR2-NOT: warning diff --git a/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt b/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt index dfbac4c..141a56a 100644 --- a/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt +++ b/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt @@ -1,33 +1,33 @@ MAX_RELATION=4 -187 7072 1 -187 6968 2 +187 7051 1 +187 6948 2 187 187 0 -187 7072 1 -187 6969 2 +187 7051 1 +187 6949 2 187 10 0 -10 7072 1 -10 7072 2 -10 7072 3 -10 6961 4 +10 7051 1 +10 7051 2 +10 7051 3 +10 6941 4 10 187 0 -187 6952 1 -187 7072 2 -187 1555 0 -1555 6882 1 -1555 6952 2 -187 7072 1 -187 6968 2 +187 6932 1 +187 7051 2 +187 1543 0 +1543 6862 1 +1543 6932 2 +187 7051 1 +187 6948 2 187 187 0 -187 7072 1 -187 6969 2 +187 7051 1 +187 6949 2 187 601 0 -601 7072 1 -601 7072 2 -601 7072 3 -601 6961 4 +601 7051 1 +601 7051 2 +601 7051 3 +601 6941 4 601 187 0 -187 6952 1 -187 7072 2 -187 1555 0 -1555 6882 1 -1555 6952 2 +187 6932 1 +187 7051 2 +187 1543 0 +1543 6862 1 +1543 6932 2 diff --git a/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt b/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt index dc436d1..dbbbbc7 100644 --- a/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt +++ b/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt @@ -1,4 +1,4 @@ -7173 +7151 AAA 0 AAD 1 AADD 2 @@ -1440,5735 +1440,5713 @@ PSUBWrm 1437 PSUBWrr 1438 PSWAPDrm 1439 PSWAPDrr 1440 -PT 1441 -PTCMMIMFP 1442 -PTCMMRLFP 1443 -PTCONJTCMMIMFP 1444 -PTCONJTFP 1445 -PTCVTROWD 1446 -PTCVTROWPS 1447 -PTDPBF 1448 -PTDPBHF 1449 -PTDPBSSD 1450 -PTDPBSSDV 1451 -PTDPBSUD 1452 -PTDPBSUDV 1453 -PTDPBUSD 1454 -PTDPBUSDV 1455 -PTDPBUUD 1456 -PTDPBUUDV 1457 -PTDPFP 1458 -PTDPHBF 1459 -PTDPHF 1460 -PTESTrm 1461 -PTESTrr 1462 -PTILELOADD 1463 -PTILELOADDRS 1464 -PTILELOADDRST 1465 -PTILELOADDRSV 1466 -PTILELOADDT 1467 -PTILELOADDV 1468 -PTILEMOVROWrre 1469 -PTILEMOVROWrreV 1470 -PTILEMOVROWrri 1471 -PTILEMOVROWrriV 1472 -PTILEPAIRLOAD 1473 -PTILEPAIRSTORE 1474 -PTILESTORED 1475 -PTILESTOREDV 1476 -PTILEZERO 1477 -PTILEZEROV 1478 -PTMMULTF 1479 -PTTCMMIMFP 1480 -PTTCMMRLFP 1481 -PTTDPBF 1482 -PTTDPFP 1483 -PTTMMULTF 1484 -PTTRANSPOSED 1485 -PTTRANSPOSEDV 1486 -PTWRITE 1487 -PTWRITEm 1488 -PTWRITEr 1489 -PUNPCKHBWrm 1490 -PUNPCKHBWrr 1491 -PUNPCKHDQrm 1492 -PUNPCKHDQrr 1493 -PUNPCKHQDQrm 1494 -PUNPCKHQDQrr 1495 -PUNPCKHWDrm 1496 -PUNPCKHWDrr 1497 -PUNPCKLBWrm 1498 -PUNPCKLBWrr 1499 -PUNPCKLDQrm 1500 -PUNPCKLDQrr 1501 -PUNPCKLQDQrm 1502 -PUNPCKLQDQrr 1503 -PUNPCKLWDrm 1504 -PUNPCKLWDrr 1505 -PUSH 1506 -PUSHA 1507 -PUSHCS 1508 -PUSHDS 1509 -PUSHES 1510 -PUSHF 1511 -PUSHFS 1512 -PUSHGS 1513 -PUSHP 1514 -PUSHSS 1515 -PVALIDATE 1516 -PXORrm 1517 -PXORrr 1518 -RCL 1519 -RCPPSm 1520 -RCPPSr 1521 -RCPSSm 1522 -RCPSSm_Int 1523 -RCPSSr 1524 -RCPSSr_Int 1525 -RCR 1526 -RDFLAGS 1527 -RDFSBASE 1528 -RDGSBASE 1529 -RDMSR 1530 -RDMSRLIST 1531 -RDMSRri 1532 -RDMSRri_EVEX 1533 -RDPID 1534 -RDPKRUr 1535 -RDPMC 1536 -RDPRU 1537 -RDRAND 1538 -RDSEED 1539 -RDSSPD 1540 -RDSSPQ 1541 -RDTSC 1542 -RDTSCP 1543 -REG_SEQUENCE 1544 -REPNE_PREFIX 1545 -REP_MOVSB 1546 -REP_MOVSD 1547 -REP_MOVSQ 1548 -REP_MOVSW 1549 -REP_PREFIX 1550 -REP_STOSB 1551 -REP_STOSD 1552 -REP_STOSQ 1553 -REP_STOSW 1554 -RET 1555 -RETI 1556 -REX 1557 -RMPADJUST 1558 -RMPQUERY 1559 -RMPUPDATE 1560 -ROL 1561 -ROR 1562 -RORX 1563 -ROUNDPDmi 1564 -ROUNDPDri 1565 -ROUNDPSmi 1566 -ROUNDPSri 1567 -ROUNDSDmi 1568 -ROUNDSDmi_Int 1569 -ROUNDSDri 1570 -ROUNDSDri_Int 1571 -ROUNDSSmi 1572 -ROUNDSSmi_Int 1573 -ROUNDSSri 1574 -ROUNDSSri_Int 1575 -RSM 1576 -RSQRTPSm 1577 -RSQRTPSr 1578 -RSQRTSSm 1579 -RSQRTSSm_Int 1580 -RSQRTSSr 1581 -RSQRTSSr_Int 1582 -RSTORSSP 1583 -SAHF 1584 -SALC 1585 -SAR 1586 -SARX 1587 -SAVEPREVSSP 1588 -SBB 1589 -SCASB 1590 -SCASL 1591 -SCASQ 1592 -SCASW 1593 -SEAMCALL 1594 -SEAMOPS 1595 -SEAMRET 1596 -SEG_ALLOCA 1597 -SEH_BeginEpilogue 1598 -SEH_EndEpilogue 1599 -SEH_EndPrologue 1600 -SEH_PushFrame 1601 -SEH_PushReg 1602 -SEH_SaveReg 1603 -SEH_SaveXMM 1604 -SEH_SetFrame 1605 -SEH_StackAlign 1606 -SEH_StackAlloc 1607 -SEH_UnwindV 1608 -SEH_UnwindVersion 1609 -SENDUIPI 1610 -SERIALIZE 1611 -SETB_C 1612 -SETCCm 1613 -SETCCm_EVEX 1614 -SETCCr 1615 -SETCCr_EVEX 1616 -SETSSBSY 1617 -SETZUCCm 1618 -SETZUCCr 1619 -SFENCE 1620 -SGDT 1621 -SHA 1622 -SHL 1623 -SHLD 1624 -SHLDROT 1625 -SHLX 1626 -SHR 1627 -SHRD 1628 -SHRDROT 1629 -SHRX 1630 -SHUFPDrmi 1631 -SHUFPDrri 1632 -SHUFPSrmi 1633 -SHUFPSrri 1634 -SIDT 1635 -SKINIT 1636 -SLDT 1637 -SLWPCB 1638 -SMSW 1639 -SQRTPDm 1640 -SQRTPDr 1641 -SQRTPSm 1642 -SQRTPSr 1643 -SQRTSDm 1644 -SQRTSDm_Int 1645 -SQRTSDr 1646 -SQRTSDr_Int 1647 -SQRTSSm 1648 -SQRTSSm_Int 1649 -SQRTSSr 1650 -SQRTSSr_Int 1651 -SQRT_F 1652 -SQRT_Fp 1653 -SS_PREFIX 1654 -STAC 1655 -STACKALLOC_W_PROBING 1656 -STACKMAP 1657 -STATEPOINT 1658 -STC 1659 -STD 1660 -STGI 1661 -STI 1662 -STMXCSR 1663 -STOSB 1664 -STOSL 1665 -STOSQ 1666 -STOSW 1667 -STR 1668 -STRm 1669 -STTILECFG 1670 -STTILECFG_EVEX 1671 -STUI 1672 -ST_F 1673 -ST_FP 1674 -ST_FPrr 1675 -ST_Fp 1676 -ST_FpP 1677 -ST_Frr 1678 -SUB 1679 -SUBPDrm 1680 -SUBPDrr 1681 -SUBPSrm 1682 -SUBPSrr 1683 -SUBREG_TO_REG 1684 -SUBR_F 1685 -SUBR_FI 1686 -SUBR_FPrST 1687 -SUBR_FST 1688 -SUBR_Fp 1689 -SUBR_FpI 1690 -SUBR_FrST 1691 -SUBSDrm 1692 -SUBSDrm_Int 1693 -SUBSDrr 1694 -SUBSDrr_Int 1695 -SUBSSrm 1696 -SUBSSrm_Int 1697 -SUBSSrr 1698 -SUBSSrr_Int 1699 -SUB_F 1700 -SUB_FI 1701 -SUB_FPrST 1702 -SUB_FST 1703 -SUB_Fp 1704 -SUB_FpI 1705 -SUB_FrST 1706 -SWAPGS 1707 -SYSCALL 1708 -SYSENTER 1709 -SYSEXIT 1710 -SYSRET 1711 -T 1712 -TAILJMPd 1713 -TAILJMPd_CC 1714 -TAILJMPm 1715 -TAILJMPr 1716 -TCMMIMFP 1717 -TCMMRLFP 1718 -TCONJTCMMIMFP 1719 -TCONJTFP 1720 -TCRETURN_HIPE 1721 -TCRETURN_WIN 1722 -TCRETURN_WINmi 1723 -TCRETURNdi 1724 -TCRETURNdicc 1725 -TCRETURNmi 1726 -TCRETURNri 1727 -TCVTROWD 1728 -TCVTROWPS 1729 -TDCALL 1730 -TDPBF 1731 -TDPBHF 1732 -TDPBSSD 1733 -TDPBSUD 1734 -TDPBUSD 1735 -TDPBUUD 1736 -TDPFP 1737 -TDPHBF 1738 -TDPHF 1739 -TEST 1740 -TESTUI 1741 -TILELOADD 1742 -TILELOADDRS 1743 -TILELOADDRST 1744 -TILELOADDRS_EVEX 1745 -TILELOADDT 1746 -TILELOADD_EVEX 1747 -TILEMOVROWrre 1748 -TILEMOVROWrri 1749 -TILERELEASE 1750 -TILESTORED 1751 -TILESTORED_EVEX 1752 -TILEZERO 1753 -TLBSYNC 1754 -TLSCall 1755 -TLS_addr 1756 -TLS_addrX 1757 -TLS_base_addr 1758 -TLS_base_addrX 1759 -TLS_desc 1760 -TMMULTF 1761 -TPAUSE 1762 -TRAP 1763 -TST_F 1764 -TST_Fp 1765 -TTCMMIMFP 1766 -TTCMMRLFP 1767 -TTDPBF 1768 -TTDPFP 1769 -TTMMULTF 1770 -TTRANSPOSED 1771 -TZCNT 1772 -TZMSK 1773 -UBSAN_UD 1774 -UCOMISDrm 1775 -UCOMISDrm_Int 1776 -UCOMISDrr 1777 -UCOMISDrr_Int 1778 -UCOMISSrm 1779 -UCOMISSrm_Int 1780 -UCOMISSrr 1781 -UCOMISSrr_Int 1782 -UCOM_FIPr 1783 -UCOM_FIr 1784 -UCOM_FPPr 1785 -UCOM_FPr 1786 -UCOM_FpIr 1787 -UCOM_Fpr 1788 -UCOM_Fr 1789 -UD 1790 -UIRET 1791 -UMONITOR 1792 -UMWAIT 1793 -UNPCKHPDrm 1794 -UNPCKHPDrr 1795 -UNPCKHPSrm 1796 -UNPCKHPSrr 1797 -UNPCKLPDrm 1798 -UNPCKLPDrr 1799 -UNPCKLPSrm 1800 -UNPCKLPSrr 1801 -URDMSRri 1802 -URDMSRri_EVEX 1803 -URDMSRrr 1804 -URDMSRrr_EVEX 1805 -UWRMSRir 1806 -UWRMSRir_EVEX 1807 -UWRMSRrr 1808 -UWRMSRrr_EVEX 1809 -V 1810 -VAARG 1811 -VAARG_X 1812 -VADDBF 1813 -VADDPDYrm 1814 -VADDPDYrr 1815 -VADDPDZ 1816 -VADDPDZrm 1817 -VADDPDZrmb 1818 -VADDPDZrmbk 1819 -VADDPDZrmbkz 1820 -VADDPDZrmk 1821 -VADDPDZrmkz 1822 -VADDPDZrr 1823 -VADDPDZrrb 1824 -VADDPDZrrbk 1825 -VADDPDZrrbkz 1826 -VADDPDZrrk 1827 -VADDPDZrrkz 1828 -VADDPDrm 1829 -VADDPDrr 1830 -VADDPHZ 1831 -VADDPHZrm 1832 -VADDPHZrmb 1833 -VADDPHZrmbk 1834 -VADDPHZrmbkz 1835 -VADDPHZrmk 1836 -VADDPHZrmkz 1837 -VADDPHZrr 1838 -VADDPHZrrb 1839 -VADDPHZrrbk 1840 -VADDPHZrrbkz 1841 -VADDPHZrrk 1842 -VADDPHZrrkz 1843 -VADDPSYrm 1844 -VADDPSYrr 1845 -VADDPSZ 1846 -VADDPSZrm 1847 -VADDPSZrmb 1848 -VADDPSZrmbk 1849 -VADDPSZrmbkz 1850 -VADDPSZrmk 1851 -VADDPSZrmkz 1852 -VADDPSZrr 1853 -VADDPSZrrb 1854 -VADDPSZrrbk 1855 -VADDPSZrrbkz 1856 -VADDPSZrrk 1857 -VADDPSZrrkz 1858 -VADDPSrm 1859 -VADDPSrr 1860 -VADDSDZrm 1861 -VADDSDZrm_Int 1862 -VADDSDZrmk_Int 1863 -VADDSDZrmkz_Int 1864 -VADDSDZrr 1865 -VADDSDZrr_Int 1866 -VADDSDZrrb_Int 1867 -VADDSDZrrbk_Int 1868 -VADDSDZrrbkz_Int 1869 -VADDSDZrrk_Int 1870 -VADDSDZrrkz_Int 1871 -VADDSDrm 1872 -VADDSDrm_Int 1873 -VADDSDrr 1874 -VADDSDrr_Int 1875 -VADDSHZrm 1876 -VADDSHZrm_Int 1877 -VADDSHZrmk_Int 1878 -VADDSHZrmkz_Int 1879 -VADDSHZrr 1880 -VADDSHZrr_Int 1881 -VADDSHZrrb_Int 1882 -VADDSHZrrbk_Int 1883 -VADDSHZrrbkz_Int 1884 -VADDSHZrrk_Int 1885 -VADDSHZrrkz_Int 1886 -VADDSSZrm 1887 -VADDSSZrm_Int 1888 -VADDSSZrmk_Int 1889 -VADDSSZrmkz_Int 1890 -VADDSSZrr 1891 -VADDSSZrr_Int 1892 -VADDSSZrrb_Int 1893 -VADDSSZrrbk_Int 1894 -VADDSSZrrbkz_Int 1895 -VADDSSZrrk_Int 1896 -VADDSSZrrkz_Int 1897 -VADDSSrm 1898 -VADDSSrm_Int 1899 -VADDSSrr 1900 -VADDSSrr_Int 1901 -VADDSUBPDYrm 1902 -VADDSUBPDYrr 1903 -VADDSUBPDrm 1904 -VADDSUBPDrr 1905 -VADDSUBPSYrm 1906 -VADDSUBPSYrr 1907 -VADDSUBPSrm 1908 -VADDSUBPSrr 1909 -VAESDECLASTYrm 1910 -VAESDECLASTYrr 1911 -VAESDECLASTZ 1912 -VAESDECLASTZrm 1913 -VAESDECLASTZrr 1914 -VAESDECLASTrm 1915 -VAESDECLASTrr 1916 -VAESDECYrm 1917 -VAESDECYrr 1918 -VAESDECZ 1919 -VAESDECZrm 1920 -VAESDECZrr 1921 -VAESDECrm 1922 -VAESDECrr 1923 -VAESENCLASTYrm 1924 -VAESENCLASTYrr 1925 -VAESENCLASTZ 1926 -VAESENCLASTZrm 1927 -VAESENCLASTZrr 1928 -VAESENCLASTrm 1929 -VAESENCLASTrr 1930 -VAESENCYrm 1931 -VAESENCYrr 1932 -VAESENCZ 1933 -VAESENCZrm 1934 -VAESENCZrr 1935 -VAESENCrm 1936 -VAESENCrr 1937 -VAESIMCrm 1938 -VAESIMCrr 1939 -VAESKEYGENASSISTrmi 1940 -VAESKEYGENASSISTrri 1941 -VALIGNDZ 1942 -VALIGNDZrmbi 1943 -VALIGNDZrmbik 1944 -VALIGNDZrmbikz 1945 -VALIGNDZrmi 1946 -VALIGNDZrmik 1947 -VALIGNDZrmikz 1948 -VALIGNDZrri 1949 -VALIGNDZrrik 1950 -VALIGNDZrrikz 1951 -VALIGNQZ 1952 -VALIGNQZrmbi 1953 -VALIGNQZrmbik 1954 -VALIGNQZrmbikz 1955 -VALIGNQZrmi 1956 -VALIGNQZrmik 1957 -VALIGNQZrmikz 1958 -VALIGNQZrri 1959 -VALIGNQZrrik 1960 -VALIGNQZrrikz 1961 -VANDNPDYrm 1962 -VANDNPDYrr 1963 -VANDNPDZ 1964 -VANDNPDZrm 1965 -VANDNPDZrmb 1966 -VANDNPDZrmbk 1967 -VANDNPDZrmbkz 1968 -VANDNPDZrmk 1969 -VANDNPDZrmkz 1970 -VANDNPDZrr 1971 -VANDNPDZrrk 1972 -VANDNPDZrrkz 1973 -VANDNPDrm 1974 -VANDNPDrr 1975 -VANDNPSYrm 1976 -VANDNPSYrr 1977 -VANDNPSZ 1978 -VANDNPSZrm 1979 -VANDNPSZrmb 1980 -VANDNPSZrmbk 1981 -VANDNPSZrmbkz 1982 -VANDNPSZrmk 1983 -VANDNPSZrmkz 1984 -VANDNPSZrr 1985 -VANDNPSZrrk 1986 -VANDNPSZrrkz 1987 -VANDNPSrm 1988 -VANDNPSrr 1989 -VANDPDYrm 1990 -VANDPDYrr 1991 -VANDPDZ 1992 -VANDPDZrm 1993 -VANDPDZrmb 1994 -VANDPDZrmbk 1995 -VANDPDZrmbkz 1996 -VANDPDZrmk 1997 -VANDPDZrmkz 1998 -VANDPDZrr 1999 -VANDPDZrrk 2000 -VANDPDZrrkz 2001 -VANDPDrm 2002 -VANDPDrr 2003 -VANDPSYrm 2004 -VANDPSYrr 2005 -VANDPSZ 2006 -VANDPSZrm 2007 -VANDPSZrmb 2008 -VANDPSZrmbk 2009 -VANDPSZrmbkz 2010 -VANDPSZrmk 2011 -VANDPSZrmkz 2012 -VANDPSZrr 2013 -VANDPSZrrk 2014 -VANDPSZrrkz 2015 -VANDPSrm 2016 -VANDPSrr 2017 -VASTART_SAVE_XMM_REGS 2018 -VBCSTNEBF 2019 -VBCSTNESH 2020 -VBLENDMPDZ 2021 -VBLENDMPDZrm 2022 -VBLENDMPDZrmb 2023 -VBLENDMPDZrmbk 2024 -VBLENDMPDZrmbkz 2025 -VBLENDMPDZrmk 2026 -VBLENDMPDZrmkz 2027 -VBLENDMPDZrr 2028 -VBLENDMPDZrrk 2029 -VBLENDMPDZrrkz 2030 -VBLENDMPSZ 2031 -VBLENDMPSZrm 2032 -VBLENDMPSZrmb 2033 -VBLENDMPSZrmbk 2034 -VBLENDMPSZrmbkz 2035 -VBLENDMPSZrmk 2036 -VBLENDMPSZrmkz 2037 -VBLENDMPSZrr 2038 -VBLENDMPSZrrk 2039 -VBLENDMPSZrrkz 2040 -VBLENDPDYrmi 2041 -VBLENDPDYrri 2042 -VBLENDPDrmi 2043 -VBLENDPDrri 2044 -VBLENDPSYrmi 2045 -VBLENDPSYrri 2046 -VBLENDPSrmi 2047 -VBLENDPSrri 2048 -VBLENDVPDYrmr 2049 -VBLENDVPDYrrr 2050 -VBLENDVPDrmr 2051 -VBLENDVPDrrr 2052 -VBLENDVPSYrmr 2053 -VBLENDVPSYrrr 2054 -VBLENDVPSrmr 2055 -VBLENDVPSrrr 2056 -VBROADCASTF 2057 -VBROADCASTI 2058 -VBROADCASTSDYrm 2059 -VBROADCASTSDYrr 2060 -VBROADCASTSDZ 2061 -VBROADCASTSDZrm 2062 -VBROADCASTSDZrmk 2063 -VBROADCASTSDZrmkz 2064 -VBROADCASTSDZrr 2065 -VBROADCASTSDZrrk 2066 -VBROADCASTSDZrrkz 2067 -VBROADCASTSSYrm 2068 -VBROADCASTSSYrr 2069 -VBROADCASTSSZ 2070 -VBROADCASTSSZrm 2071 -VBROADCASTSSZrmk 2072 -VBROADCASTSSZrmkz 2073 -VBROADCASTSSZrr 2074 -VBROADCASTSSZrrk 2075 -VBROADCASTSSZrrkz 2076 -VBROADCASTSSrm 2077 -VBROADCASTSSrr 2078 -VCMPBF 2079 -VCMPPDYrmi 2080 -VCMPPDYrri 2081 -VCMPPDZ 2082 -VCMPPDZrmbi 2083 -VCMPPDZrmbik 2084 -VCMPPDZrmi 2085 -VCMPPDZrmik 2086 -VCMPPDZrri 2087 -VCMPPDZrrib 2088 -VCMPPDZrribk 2089 -VCMPPDZrrik 2090 -VCMPPDrmi 2091 -VCMPPDrri 2092 -VCMPPHZ 2093 -VCMPPHZrmbi 2094 -VCMPPHZrmbik 2095 -VCMPPHZrmi 2096 -VCMPPHZrmik 2097 -VCMPPHZrri 2098 -VCMPPHZrrib 2099 -VCMPPHZrribk 2100 -VCMPPHZrrik 2101 -VCMPPSYrmi 2102 -VCMPPSYrri 2103 -VCMPPSZ 2104 -VCMPPSZrmbi 2105 -VCMPPSZrmbik 2106 -VCMPPSZrmi 2107 -VCMPPSZrmik 2108 -VCMPPSZrri 2109 -VCMPPSZrrib 2110 -VCMPPSZrribk 2111 -VCMPPSZrrik 2112 -VCMPPSrmi 2113 -VCMPPSrri 2114 -VCMPSDZrmi 2115 -VCMPSDZrmi_Int 2116 -VCMPSDZrmik_Int 2117 -VCMPSDZrri 2118 -VCMPSDZrri_Int 2119 -VCMPSDZrrib_Int 2120 -VCMPSDZrribk_Int 2121 -VCMPSDZrrik_Int 2122 -VCMPSDrmi 2123 -VCMPSDrmi_Int 2124 -VCMPSDrri 2125 -VCMPSDrri_Int 2126 -VCMPSHZrmi 2127 -VCMPSHZrmi_Int 2128 -VCMPSHZrmik_Int 2129 -VCMPSHZrri 2130 -VCMPSHZrri_Int 2131 -VCMPSHZrrib_Int 2132 -VCMPSHZrribk_Int 2133 -VCMPSHZrrik_Int 2134 -VCMPSSZrmi 2135 -VCMPSSZrmi_Int 2136 -VCMPSSZrmik_Int 2137 -VCMPSSZrri 2138 -VCMPSSZrri_Int 2139 -VCMPSSZrrib_Int 2140 -VCMPSSZrribk_Int 2141 -VCMPSSZrrik_Int 2142 -VCMPSSrmi 2143 -VCMPSSrmi_Int 2144 -VCMPSSrri 2145 -VCMPSSrri_Int 2146 -VCOMISBF 2147 -VCOMISDZrm 2148 -VCOMISDZrm_Int 2149 -VCOMISDZrr 2150 -VCOMISDZrr_Int 2151 -VCOMISDZrrb 2152 -VCOMISDrm 2153 -VCOMISDrm_Int 2154 -VCOMISDrr 2155 -VCOMISDrr_Int 2156 -VCOMISHZrm 2157 -VCOMISHZrm_Int 2158 -VCOMISHZrr 2159 -VCOMISHZrr_Int 2160 -VCOMISHZrrb 2161 -VCOMISSZrm 2162 -VCOMISSZrm_Int 2163 -VCOMISSZrr 2164 -VCOMISSZrr_Int 2165 -VCOMISSZrrb 2166 -VCOMISSrm 2167 -VCOMISSrm_Int 2168 -VCOMISSrr 2169 -VCOMISSrr_Int 2170 -VCOMPRESSPDZ 2171 -VCOMPRESSPDZmr 2172 -VCOMPRESSPDZmrk 2173 -VCOMPRESSPDZrr 2174 -VCOMPRESSPDZrrk 2175 -VCOMPRESSPDZrrkz 2176 -VCOMPRESSPSZ 2177 -VCOMPRESSPSZmr 2178 -VCOMPRESSPSZmrk 2179 -VCOMPRESSPSZrr 2180 -VCOMPRESSPSZrrk 2181 -VCOMPRESSPSZrrkz 2182 -VCOMXSDZrm_Int 2183 -VCOMXSDZrr_Int 2184 -VCOMXSDZrrb_Int 2185 -VCOMXSHZrm_Int 2186 -VCOMXSHZrr_Int 2187 -VCOMXSHZrrb_Int 2188 -VCOMXSSZrm_Int 2189 -VCOMXSSZrr_Int 2190 -VCOMXSSZrrb_Int 2191 -VCVT 2192 -VCVTBF 2193 -VCVTBIASPH 2194 -VCVTDQ 2195 -VCVTHF 2196 -VCVTNE 2197 -VCVTNEEBF 2198 -VCVTNEEPH 2199 -VCVTNEOBF 2200 -VCVTNEOPH 2201 -VCVTNEPS 2202 -VCVTPD 2203 -VCVTPH 2204 -VCVTPS 2205 -VCVTQQ 2206 -VCVTSD 2207 -VCVTSH 2208 -VCVTSI 2209 -VCVTSS 2210 -VCVTTBF 2211 -VCVTTPD 2212 -VCVTTPH 2213 -VCVTTPS 2214 -VCVTTSD 2215 -VCVTTSH 2216 -VCVTTSS 2217 -VCVTUDQ 2218 -VCVTUQQ 2219 -VCVTUSI 2220 -VCVTUW 2221 -VCVTW 2222 -VDBPSADBWZ 2223 -VDBPSADBWZrmi 2224 -VDBPSADBWZrmik 2225 -VDBPSADBWZrmikz 2226 -VDBPSADBWZrri 2227 -VDBPSADBWZrrik 2228 -VDBPSADBWZrrikz 2229 -VDIVBF 2230 -VDIVPDYrm 2231 -VDIVPDYrr 2232 -VDIVPDZ 2233 -VDIVPDZrm 2234 -VDIVPDZrmb 2235 -VDIVPDZrmbk 2236 -VDIVPDZrmbkz 2237 -VDIVPDZrmk 2238 -VDIVPDZrmkz 2239 -VDIVPDZrr 2240 -VDIVPDZrrb 2241 -VDIVPDZrrbk 2242 -VDIVPDZrrbkz 2243 -VDIVPDZrrk 2244 -VDIVPDZrrkz 2245 -VDIVPDrm 2246 -VDIVPDrr 2247 -VDIVPHZ 2248 -VDIVPHZrm 2249 -VDIVPHZrmb 2250 -VDIVPHZrmbk 2251 -VDIVPHZrmbkz 2252 -VDIVPHZrmk 2253 -VDIVPHZrmkz 2254 -VDIVPHZrr 2255 -VDIVPHZrrb 2256 -VDIVPHZrrbk 2257 -VDIVPHZrrbkz 2258 -VDIVPHZrrk 2259 -VDIVPHZrrkz 2260 -VDIVPSYrm 2261 -VDIVPSYrr 2262 -VDIVPSZ 2263 -VDIVPSZrm 2264 -VDIVPSZrmb 2265 -VDIVPSZrmbk 2266 -VDIVPSZrmbkz 2267 -VDIVPSZrmk 2268 -VDIVPSZrmkz 2269 -VDIVPSZrr 2270 -VDIVPSZrrb 2271 -VDIVPSZrrbk 2272 -VDIVPSZrrbkz 2273 -VDIVPSZrrk 2274 -VDIVPSZrrkz 2275 -VDIVPSrm 2276 -VDIVPSrr 2277 -VDIVSDZrm 2278 -VDIVSDZrm_Int 2279 -VDIVSDZrmk_Int 2280 -VDIVSDZrmkz_Int 2281 -VDIVSDZrr 2282 -VDIVSDZrr_Int 2283 -VDIVSDZrrb_Int 2284 -VDIVSDZrrbk_Int 2285 -VDIVSDZrrbkz_Int 2286 -VDIVSDZrrk_Int 2287 -VDIVSDZrrkz_Int 2288 -VDIVSDrm 2289 -VDIVSDrm_Int 2290 -VDIVSDrr 2291 -VDIVSDrr_Int 2292 -VDIVSHZrm 2293 -VDIVSHZrm_Int 2294 -VDIVSHZrmk_Int 2295 -VDIVSHZrmkz_Int 2296 -VDIVSHZrr 2297 -VDIVSHZrr_Int 2298 -VDIVSHZrrb_Int 2299 -VDIVSHZrrbk_Int 2300 -VDIVSHZrrbkz_Int 2301 -VDIVSHZrrk_Int 2302 -VDIVSHZrrkz_Int 2303 -VDIVSSZrm 2304 -VDIVSSZrm_Int 2305 -VDIVSSZrmk_Int 2306 -VDIVSSZrmkz_Int 2307 -VDIVSSZrr 2308 -VDIVSSZrr_Int 2309 -VDIVSSZrrb_Int 2310 -VDIVSSZrrbk_Int 2311 -VDIVSSZrrbkz_Int 2312 -VDIVSSZrrk_Int 2313 -VDIVSSZrrkz_Int 2314 -VDIVSSrm 2315 -VDIVSSrm_Int 2316 -VDIVSSrr 2317 -VDIVSSrr_Int 2318 -VDPBF 2319 -VDPPDrmi 2320 -VDPPDrri 2321 -VDPPHPSZ 2322 -VDPPHPSZm 2323 -VDPPHPSZmb 2324 -VDPPHPSZmbk 2325 -VDPPHPSZmbkz 2326 -VDPPHPSZmk 2327 -VDPPHPSZmkz 2328 -VDPPHPSZr 2329 -VDPPHPSZrk 2330 -VDPPHPSZrkz 2331 -VDPPSYrmi 2332 -VDPPSYrri 2333 -VDPPSrmi 2334 -VDPPSrri 2335 -VERRm 2336 -VERRr 2337 -VERWm 2338 -VERWr 2339 -VEXP 2340 -VEXPANDPDZ 2341 -VEXPANDPDZrm 2342 -VEXPANDPDZrmk 2343 -VEXPANDPDZrmkz 2344 -VEXPANDPDZrr 2345 -VEXPANDPDZrrk 2346 -VEXPANDPDZrrkz 2347 -VEXPANDPSZ 2348 -VEXPANDPSZrm 2349 -VEXPANDPSZrmk 2350 -VEXPANDPSZrmkz 2351 -VEXPANDPSZrr 2352 -VEXPANDPSZrrk 2353 -VEXPANDPSZrrkz 2354 -VEXTRACTF 2355 -VEXTRACTI 2356 -VEXTRACTPSZmri 2357 -VEXTRACTPSZrri 2358 -VEXTRACTPSmri 2359 -VEXTRACTPSrri 2360 -VFCMADDCPHZ 2361 -VFCMADDCPHZm 2362 -VFCMADDCPHZmb 2363 -VFCMADDCPHZmbk 2364 -VFCMADDCPHZmbkz 2365 -VFCMADDCPHZmk 2366 -VFCMADDCPHZmkz 2367 -VFCMADDCPHZr 2368 -VFCMADDCPHZrb 2369 -VFCMADDCPHZrbk 2370 -VFCMADDCPHZrbkz 2371 -VFCMADDCPHZrk 2372 -VFCMADDCPHZrkz 2373 -VFCMADDCSHZm 2374 -VFCMADDCSHZmk 2375 -VFCMADDCSHZmkz 2376 -VFCMADDCSHZr 2377 -VFCMADDCSHZrb 2378 -VFCMADDCSHZrbk 2379 -VFCMADDCSHZrbkz 2380 -VFCMADDCSHZrk 2381 -VFCMADDCSHZrkz 2382 -VFCMULCPHZ 2383 -VFCMULCPHZrm 2384 -VFCMULCPHZrmb 2385 -VFCMULCPHZrmbk 2386 -VFCMULCPHZrmbkz 2387 -VFCMULCPHZrmk 2388 -VFCMULCPHZrmkz 2389 -VFCMULCPHZrr 2390 -VFCMULCPHZrrb 2391 -VFCMULCPHZrrbk 2392 -VFCMULCPHZrrbkz 2393 -VFCMULCPHZrrk 2394 -VFCMULCPHZrrkz 2395 -VFCMULCSHZrm 2396 -VFCMULCSHZrmk 2397 -VFCMULCSHZrmkz 2398 -VFCMULCSHZrr 2399 -VFCMULCSHZrrb 2400 -VFCMULCSHZrrbk 2401 -VFCMULCSHZrrbkz 2402 -VFCMULCSHZrrk 2403 -VFCMULCSHZrrkz 2404 -VFIXUPIMMPDZ 2405 -VFIXUPIMMPDZrmbi 2406 -VFIXUPIMMPDZrmbik 2407 -VFIXUPIMMPDZrmbikz 2408 -VFIXUPIMMPDZrmi 2409 -VFIXUPIMMPDZrmik 2410 -VFIXUPIMMPDZrmikz 2411 -VFIXUPIMMPDZrri 2412 -VFIXUPIMMPDZrrib 2413 -VFIXUPIMMPDZrribk 2414 -VFIXUPIMMPDZrribkz 2415 -VFIXUPIMMPDZrrik 2416 -VFIXUPIMMPDZrrikz 2417 -VFIXUPIMMPSZ 2418 -VFIXUPIMMPSZrmbi 2419 -VFIXUPIMMPSZrmbik 2420 -VFIXUPIMMPSZrmbikz 2421 -VFIXUPIMMPSZrmi 2422 -VFIXUPIMMPSZrmik 2423 -VFIXUPIMMPSZrmikz 2424 -VFIXUPIMMPSZrri 2425 -VFIXUPIMMPSZrrib 2426 -VFIXUPIMMPSZrribk 2427 -VFIXUPIMMPSZrribkz 2428 -VFIXUPIMMPSZrrik 2429 -VFIXUPIMMPSZrrikz 2430 -VFIXUPIMMSDZrmi 2431 -VFIXUPIMMSDZrmik 2432 -VFIXUPIMMSDZrmikz 2433 -VFIXUPIMMSDZrri 2434 -VFIXUPIMMSDZrrib 2435 -VFIXUPIMMSDZrribk 2436 -VFIXUPIMMSDZrribkz 2437 -VFIXUPIMMSDZrrik 2438 -VFIXUPIMMSDZrrikz 2439 -VFIXUPIMMSSZrmi 2440 -VFIXUPIMMSSZrmik 2441 -VFIXUPIMMSSZrmikz 2442 -VFIXUPIMMSSZrri 2443 -VFIXUPIMMSSZrrib 2444 -VFIXUPIMMSSZrribk 2445 -VFIXUPIMMSSZrribkz 2446 -VFIXUPIMMSSZrrik 2447 -VFIXUPIMMSSZrrikz 2448 -VFMADD 2449 -VFMADDCPHZ 2450 -VFMADDCPHZm 2451 -VFMADDCPHZmb 2452 -VFMADDCPHZmbk 2453 -VFMADDCPHZmbkz 2454 -VFMADDCPHZmk 2455 -VFMADDCPHZmkz 2456 -VFMADDCPHZr 2457 -VFMADDCPHZrb 2458 -VFMADDCPHZrbk 2459 -VFMADDCPHZrbkz 2460 -VFMADDCPHZrk 2461 -VFMADDCPHZrkz 2462 -VFMADDCSHZm 2463 -VFMADDCSHZmk 2464 -VFMADDCSHZmkz 2465 -VFMADDCSHZr 2466 -VFMADDCSHZrb 2467 -VFMADDCSHZrbk 2468 -VFMADDCSHZrbkz 2469 -VFMADDCSHZrk 2470 -VFMADDCSHZrkz 2471 -VFMADDPD 2472 -VFMADDPS 2473 -VFMADDSD 2474 -VFMADDSS 2475 -VFMADDSUB 2476 -VFMADDSUBPD 2477 -VFMADDSUBPS 2478 -VFMSUB 2479 -VFMSUBADD 2480 -VFMSUBADDPD 2481 -VFMSUBADDPS 2482 -VFMSUBPD 2483 -VFMSUBPS 2484 -VFMSUBSD 2485 -VFMSUBSS 2486 -VFMULCPHZ 2487 -VFMULCPHZrm 2488 -VFMULCPHZrmb 2489 -VFMULCPHZrmbk 2490 -VFMULCPHZrmbkz 2491 -VFMULCPHZrmk 2492 -VFMULCPHZrmkz 2493 -VFMULCPHZrr 2494 -VFMULCPHZrrb 2495 -VFMULCPHZrrbk 2496 -VFMULCPHZrrbkz 2497 -VFMULCPHZrrk 2498 -VFMULCPHZrrkz 2499 -VFMULCSHZrm 2500 -VFMULCSHZrmk 2501 -VFMULCSHZrmkz 2502 -VFMULCSHZrr 2503 -VFMULCSHZrrb 2504 -VFMULCSHZrrbk 2505 -VFMULCSHZrrbkz 2506 -VFMULCSHZrrk 2507 -VFMULCSHZrrkz 2508 -VFNMADD 2509 -VFNMADDPD 2510 -VFNMADDPS 2511 -VFNMADDSD 2512 -VFNMADDSS 2513 -VFNMSUB 2514 -VFNMSUBPD 2515 -VFNMSUBPS 2516 -VFNMSUBSD 2517 -VFNMSUBSS 2518 -VFPCLASSBF 2519 -VFPCLASSPDZ 2520 -VFPCLASSPDZmbi 2521 -VFPCLASSPDZmbik 2522 -VFPCLASSPDZmi 2523 -VFPCLASSPDZmik 2524 -VFPCLASSPDZri 2525 -VFPCLASSPDZrik 2526 -VFPCLASSPHZ 2527 -VFPCLASSPHZmbi 2528 -VFPCLASSPHZmbik 2529 -VFPCLASSPHZmi 2530 -VFPCLASSPHZmik 2531 -VFPCLASSPHZri 2532 -VFPCLASSPHZrik 2533 -VFPCLASSPSZ 2534 -VFPCLASSPSZmbi 2535 -VFPCLASSPSZmbik 2536 -VFPCLASSPSZmi 2537 -VFPCLASSPSZmik 2538 -VFPCLASSPSZri 2539 -VFPCLASSPSZrik 2540 -VFPCLASSSDZmi 2541 -VFPCLASSSDZmik 2542 -VFPCLASSSDZri 2543 -VFPCLASSSDZrik 2544 -VFPCLASSSHZmi 2545 -VFPCLASSSHZmik 2546 -VFPCLASSSHZri 2547 -VFPCLASSSHZrik 2548 -VFPCLASSSSZmi 2549 -VFPCLASSSSZmik 2550 -VFPCLASSSSZri 2551 -VFPCLASSSSZrik 2552 -VFRCZPDYrm 2553 -VFRCZPDYrr 2554 -VFRCZPDrm 2555 -VFRCZPDrr 2556 -VFRCZPSYrm 2557 -VFRCZPSYrr 2558 -VFRCZPSrm 2559 -VFRCZPSrr 2560 -VFRCZSDrm 2561 -VFRCZSDrr 2562 -VFRCZSSrm 2563 -VFRCZSSrr 2564 -VGATHERDPDYrm 2565 -VGATHERDPDZ 2566 -VGATHERDPDZrm 2567 -VGATHERDPDrm 2568 -VGATHERDPSYrm 2569 -VGATHERDPSZ 2570 -VGATHERDPSZrm 2571 -VGATHERDPSrm 2572 -VGATHERPF 2573 -VGATHERQPDYrm 2574 -VGATHERQPDZ 2575 -VGATHERQPDZrm 2576 -VGATHERQPDrm 2577 -VGATHERQPSYrm 2578 -VGATHERQPSZ 2579 -VGATHERQPSZrm 2580 -VGATHERQPSrm 2581 -VGETEXPBF 2582 -VGETEXPPDZ 2583 -VGETEXPPDZm 2584 -VGETEXPPDZmb 2585 -VGETEXPPDZmbk 2586 -VGETEXPPDZmbkz 2587 -VGETEXPPDZmk 2588 -VGETEXPPDZmkz 2589 -VGETEXPPDZr 2590 -VGETEXPPDZrb 2591 -VGETEXPPDZrbk 2592 -VGETEXPPDZrbkz 2593 -VGETEXPPDZrk 2594 -VGETEXPPDZrkz 2595 -VGETEXPPHZ 2596 -VGETEXPPHZm 2597 -VGETEXPPHZmb 2598 -VGETEXPPHZmbk 2599 -VGETEXPPHZmbkz 2600 -VGETEXPPHZmk 2601 -VGETEXPPHZmkz 2602 -VGETEXPPHZr 2603 -VGETEXPPHZrb 2604 -VGETEXPPHZrbk 2605 -VGETEXPPHZrbkz 2606 -VGETEXPPHZrk 2607 -VGETEXPPHZrkz 2608 -VGETEXPPSZ 2609 -VGETEXPPSZm 2610 -VGETEXPPSZmb 2611 -VGETEXPPSZmbk 2612 -VGETEXPPSZmbkz 2613 -VGETEXPPSZmk 2614 -VGETEXPPSZmkz 2615 -VGETEXPPSZr 2616 -VGETEXPPSZrb 2617 -VGETEXPPSZrbk 2618 -VGETEXPPSZrbkz 2619 -VGETEXPPSZrk 2620 -VGETEXPPSZrkz 2621 -VGETEXPSDZm 2622 -VGETEXPSDZmk 2623 -VGETEXPSDZmkz 2624 -VGETEXPSDZr 2625 -VGETEXPSDZrb 2626 -VGETEXPSDZrbk 2627 -VGETEXPSDZrbkz 2628 -VGETEXPSDZrk 2629 -VGETEXPSDZrkz 2630 -VGETEXPSHZm 2631 -VGETEXPSHZmk 2632 -VGETEXPSHZmkz 2633 -VGETEXPSHZr 2634 -VGETEXPSHZrb 2635 -VGETEXPSHZrbk 2636 -VGETEXPSHZrbkz 2637 -VGETEXPSHZrk 2638 -VGETEXPSHZrkz 2639 -VGETEXPSSZm 2640 -VGETEXPSSZmk 2641 -VGETEXPSSZmkz 2642 -VGETEXPSSZr 2643 -VGETEXPSSZrb 2644 -VGETEXPSSZrbk 2645 -VGETEXPSSZrbkz 2646 -VGETEXPSSZrk 2647 -VGETEXPSSZrkz 2648 -VGETMANTBF 2649 -VGETMANTPDZ 2650 -VGETMANTPDZrmbi 2651 -VGETMANTPDZrmbik 2652 -VGETMANTPDZrmbikz 2653 -VGETMANTPDZrmi 2654 -VGETMANTPDZrmik 2655 -VGETMANTPDZrmikz 2656 -VGETMANTPDZrri 2657 -VGETMANTPDZrrib 2658 -VGETMANTPDZrribk 2659 -VGETMANTPDZrribkz 2660 -VGETMANTPDZrrik 2661 -VGETMANTPDZrrikz 2662 -VGETMANTPHZ 2663 -VGETMANTPHZrmbi 2664 -VGETMANTPHZrmbik 2665 -VGETMANTPHZrmbikz 2666 -VGETMANTPHZrmi 2667 -VGETMANTPHZrmik 2668 -VGETMANTPHZrmikz 2669 -VGETMANTPHZrri 2670 -VGETMANTPHZrrib 2671 -VGETMANTPHZrribk 2672 -VGETMANTPHZrribkz 2673 -VGETMANTPHZrrik 2674 -VGETMANTPHZrrikz 2675 -VGETMANTPSZ 2676 -VGETMANTPSZrmbi 2677 -VGETMANTPSZrmbik 2678 -VGETMANTPSZrmbikz 2679 -VGETMANTPSZrmi 2680 -VGETMANTPSZrmik 2681 -VGETMANTPSZrmikz 2682 -VGETMANTPSZrri 2683 -VGETMANTPSZrrib 2684 -VGETMANTPSZrribk 2685 -VGETMANTPSZrribkz 2686 -VGETMANTPSZrrik 2687 -VGETMANTPSZrrikz 2688 -VGETMANTSDZrmi 2689 -VGETMANTSDZrmik 2690 -VGETMANTSDZrmikz 2691 -VGETMANTSDZrri 2692 -VGETMANTSDZrrib 2693 -VGETMANTSDZrribk 2694 -VGETMANTSDZrribkz 2695 -VGETMANTSDZrrik 2696 -VGETMANTSDZrrikz 2697 -VGETMANTSHZrmi 2698 -VGETMANTSHZrmik 2699 -VGETMANTSHZrmikz 2700 -VGETMANTSHZrri 2701 -VGETMANTSHZrrib 2702 -VGETMANTSHZrribk 2703 -VGETMANTSHZrribkz 2704 -VGETMANTSHZrrik 2705 -VGETMANTSHZrrikz 2706 -VGETMANTSSZrmi 2707 -VGETMANTSSZrmik 2708 -VGETMANTSSZrmikz 2709 -VGETMANTSSZrri 2710 -VGETMANTSSZrrib 2711 -VGETMANTSSZrribk 2712 -VGETMANTSSZrribkz 2713 -VGETMANTSSZrrik 2714 -VGETMANTSSZrrikz 2715 -VGF 2716 -VHADDPDYrm 2717 -VHADDPDYrr 2718 -VHADDPDrm 2719 -VHADDPDrr 2720 -VHADDPSYrm 2721 -VHADDPSYrr 2722 -VHADDPSrm 2723 -VHADDPSrr 2724 -VHSUBPDYrm 2725 -VHSUBPDYrr 2726 -VHSUBPDrm 2727 -VHSUBPDrr 2728 -VHSUBPSYrm 2729 -VHSUBPSYrr 2730 -VHSUBPSrm 2731 -VHSUBPSrr 2732 -VINSERTF 2733 -VINSERTI 2734 -VINSERTPSZrmi 2735 -VINSERTPSZrri 2736 -VINSERTPSrmi 2737 -VINSERTPSrri 2738 -VLDDQUYrm 2739 -VLDDQUrm 2740 -VLDMXCSR 2741 -VMASKMOVDQU 2742 -VMASKMOVPDYmr 2743 -VMASKMOVPDYrm 2744 -VMASKMOVPDmr 2745 -VMASKMOVPDrm 2746 -VMASKMOVPSYmr 2747 -VMASKMOVPSYrm 2748 -VMASKMOVPSmr 2749 -VMASKMOVPSrm 2750 -VMAXBF 2751 -VMAXCPDYrm 2752 -VMAXCPDYrr 2753 -VMAXCPDZ 2754 -VMAXCPDZrm 2755 -VMAXCPDZrmb 2756 -VMAXCPDZrmbk 2757 -VMAXCPDZrmbkz 2758 -VMAXCPDZrmk 2759 -VMAXCPDZrmkz 2760 -VMAXCPDZrr 2761 -VMAXCPDZrrk 2762 -VMAXCPDZrrkz 2763 -VMAXCPDrm 2764 -VMAXCPDrr 2765 -VMAXCPHZ 2766 -VMAXCPHZrm 2767 -VMAXCPHZrmb 2768 -VMAXCPHZrmbk 2769 -VMAXCPHZrmbkz 2770 -VMAXCPHZrmk 2771 -VMAXCPHZrmkz 2772 -VMAXCPHZrr 2773 -VMAXCPHZrrk 2774 -VMAXCPHZrrkz 2775 -VMAXCPSYrm 2776 -VMAXCPSYrr 2777 -VMAXCPSZ 2778 -VMAXCPSZrm 2779 -VMAXCPSZrmb 2780 -VMAXCPSZrmbk 2781 -VMAXCPSZrmbkz 2782 -VMAXCPSZrmk 2783 -VMAXCPSZrmkz 2784 -VMAXCPSZrr 2785 -VMAXCPSZrrk 2786 -VMAXCPSZrrkz 2787 -VMAXCPSrm 2788 -VMAXCPSrr 2789 -VMAXCSDZrm 2790 -VMAXCSDZrr 2791 -VMAXCSDrm 2792 -VMAXCSDrr 2793 -VMAXCSHZrm 2794 -VMAXCSHZrr 2795 -VMAXCSSZrm 2796 -VMAXCSSZrr 2797 -VMAXCSSrm 2798 -VMAXCSSrr 2799 -VMAXPDYrm 2800 -VMAXPDYrr 2801 -VMAXPDZ 2802 -VMAXPDZrm 2803 -VMAXPDZrmb 2804 -VMAXPDZrmbk 2805 -VMAXPDZrmbkz 2806 -VMAXPDZrmk 2807 -VMAXPDZrmkz 2808 -VMAXPDZrr 2809 -VMAXPDZrrb 2810 -VMAXPDZrrbk 2811 -VMAXPDZrrbkz 2812 -VMAXPDZrrk 2813 -VMAXPDZrrkz 2814 -VMAXPDrm 2815 -VMAXPDrr 2816 -VMAXPHZ 2817 -VMAXPHZrm 2818 -VMAXPHZrmb 2819 -VMAXPHZrmbk 2820 -VMAXPHZrmbkz 2821 -VMAXPHZrmk 2822 -VMAXPHZrmkz 2823 -VMAXPHZrr 2824 -VMAXPHZrrb 2825 -VMAXPHZrrbk 2826 -VMAXPHZrrbkz 2827 -VMAXPHZrrk 2828 -VMAXPHZrrkz 2829 -VMAXPSYrm 2830 -VMAXPSYrr 2831 -VMAXPSZ 2832 -VMAXPSZrm 2833 -VMAXPSZrmb 2834 -VMAXPSZrmbk 2835 -VMAXPSZrmbkz 2836 -VMAXPSZrmk 2837 -VMAXPSZrmkz 2838 -VMAXPSZrr 2839 -VMAXPSZrrb 2840 -VMAXPSZrrbk 2841 -VMAXPSZrrbkz 2842 -VMAXPSZrrk 2843 -VMAXPSZrrkz 2844 -VMAXPSrm 2845 -VMAXPSrr 2846 -VMAXSDZrm 2847 -VMAXSDZrm_Int 2848 -VMAXSDZrmk_Int 2849 -VMAXSDZrmkz_Int 2850 -VMAXSDZrr 2851 -VMAXSDZrr_Int 2852 -VMAXSDZrrb_Int 2853 -VMAXSDZrrbk_Int 2854 -VMAXSDZrrbkz_Int 2855 -VMAXSDZrrk_Int 2856 -VMAXSDZrrkz_Int 2857 -VMAXSDrm 2858 -VMAXSDrm_Int 2859 -VMAXSDrr 2860 -VMAXSDrr_Int 2861 -VMAXSHZrm 2862 -VMAXSHZrm_Int 2863 -VMAXSHZrmk_Int 2864 -VMAXSHZrmkz_Int 2865 -VMAXSHZrr 2866 -VMAXSHZrr_Int 2867 -VMAXSHZrrb_Int 2868 -VMAXSHZrrbk_Int 2869 -VMAXSHZrrbkz_Int 2870 -VMAXSHZrrk_Int 2871 -VMAXSHZrrkz_Int 2872 -VMAXSSZrm 2873 -VMAXSSZrm_Int 2874 -VMAXSSZrmk_Int 2875 -VMAXSSZrmkz_Int 2876 -VMAXSSZrr 2877 -VMAXSSZrr_Int 2878 -VMAXSSZrrb_Int 2879 -VMAXSSZrrbk_Int 2880 -VMAXSSZrrbkz_Int 2881 -VMAXSSZrrk_Int 2882 -VMAXSSZrrkz_Int 2883 -VMAXSSrm 2884 -VMAXSSrm_Int 2885 -VMAXSSrr 2886 -VMAXSSrr_Int 2887 -VMCALL 2888 -VMCLEARm 2889 -VMFUNC 2890 -VMINBF 2891 -VMINCPDYrm 2892 -VMINCPDYrr 2893 -VMINCPDZ 2894 -VMINCPDZrm 2895 -VMINCPDZrmb 2896 -VMINCPDZrmbk 2897 -VMINCPDZrmbkz 2898 -VMINCPDZrmk 2899 -VMINCPDZrmkz 2900 -VMINCPDZrr 2901 -VMINCPDZrrk 2902 -VMINCPDZrrkz 2903 -VMINCPDrm 2904 -VMINCPDrr 2905 -VMINCPHZ 2906 -VMINCPHZrm 2907 -VMINCPHZrmb 2908 -VMINCPHZrmbk 2909 -VMINCPHZrmbkz 2910 -VMINCPHZrmk 2911 -VMINCPHZrmkz 2912 -VMINCPHZrr 2913 -VMINCPHZrrk 2914 -VMINCPHZrrkz 2915 -VMINCPSYrm 2916 -VMINCPSYrr 2917 -VMINCPSZ 2918 -VMINCPSZrm 2919 -VMINCPSZrmb 2920 -VMINCPSZrmbk 2921 -VMINCPSZrmbkz 2922 -VMINCPSZrmk 2923 -VMINCPSZrmkz 2924 -VMINCPSZrr 2925 -VMINCPSZrrk 2926 -VMINCPSZrrkz 2927 -VMINCPSrm 2928 -VMINCPSrr 2929 -VMINCSDZrm 2930 -VMINCSDZrr 2931 -VMINCSDrm 2932 -VMINCSDrr 2933 -VMINCSHZrm 2934 -VMINCSHZrr 2935 -VMINCSSZrm 2936 -VMINCSSZrr 2937 -VMINCSSrm 2938 -VMINCSSrr 2939 -VMINMAXBF 2940 -VMINMAXPDZ 2941 -VMINMAXPDZrmbi 2942 -VMINMAXPDZrmbik 2943 -VMINMAXPDZrmbikz 2944 -VMINMAXPDZrmi 2945 -VMINMAXPDZrmik 2946 -VMINMAXPDZrmikz 2947 -VMINMAXPDZrri 2948 -VMINMAXPDZrrib 2949 -VMINMAXPDZrribk 2950 -VMINMAXPDZrribkz 2951 -VMINMAXPDZrrik 2952 -VMINMAXPDZrrikz 2953 -VMINMAXPHZ 2954 -VMINMAXPHZrmbi 2955 -VMINMAXPHZrmbik 2956 -VMINMAXPHZrmbikz 2957 -VMINMAXPHZrmi 2958 -VMINMAXPHZrmik 2959 -VMINMAXPHZrmikz 2960 -VMINMAXPHZrri 2961 -VMINMAXPHZrrib 2962 -VMINMAXPHZrribk 2963 -VMINMAXPHZrribkz 2964 -VMINMAXPHZrrik 2965 -VMINMAXPHZrrikz 2966 -VMINMAXPSZ 2967 -VMINMAXPSZrmbi 2968 -VMINMAXPSZrmbik 2969 -VMINMAXPSZrmbikz 2970 -VMINMAXPSZrmi 2971 -VMINMAXPSZrmik 2972 -VMINMAXPSZrmikz 2973 -VMINMAXPSZrri 2974 -VMINMAXPSZrrib 2975 -VMINMAXPSZrribk 2976 -VMINMAXPSZrribkz 2977 -VMINMAXPSZrrik 2978 -VMINMAXPSZrrikz 2979 -VMINMAXSDrmi 2980 -VMINMAXSDrmi_Int 2981 -VMINMAXSDrmik_Int 2982 -VMINMAXSDrmikz_Int 2983 -VMINMAXSDrri 2984 -VMINMAXSDrri_Int 2985 -VMINMAXSDrrib_Int 2986 -VMINMAXSDrribk_Int 2987 -VMINMAXSDrribkz_Int 2988 -VMINMAXSDrrik_Int 2989 -VMINMAXSDrrikz_Int 2990 -VMINMAXSHrmi 2991 -VMINMAXSHrmi_Int 2992 -VMINMAXSHrmik_Int 2993 -VMINMAXSHrmikz_Int 2994 -VMINMAXSHrri 2995 -VMINMAXSHrri_Int 2996 -VMINMAXSHrrib_Int 2997 -VMINMAXSHrribk_Int 2998 -VMINMAXSHrribkz_Int 2999 -VMINMAXSHrrik_Int 3000 -VMINMAXSHrrikz_Int 3001 -VMINMAXSSrmi 3002 -VMINMAXSSrmi_Int 3003 -VMINMAXSSrmik_Int 3004 -VMINMAXSSrmikz_Int 3005 -VMINMAXSSrri 3006 -VMINMAXSSrri_Int 3007 -VMINMAXSSrrib_Int 3008 -VMINMAXSSrribk_Int 3009 -VMINMAXSSrribkz_Int 3010 -VMINMAXSSrrik_Int 3011 -VMINMAXSSrrikz_Int 3012 -VMINPDYrm 3013 -VMINPDYrr 3014 -VMINPDZ 3015 -VMINPDZrm 3016 -VMINPDZrmb 3017 -VMINPDZrmbk 3018 -VMINPDZrmbkz 3019 -VMINPDZrmk 3020 -VMINPDZrmkz 3021 -VMINPDZrr 3022 -VMINPDZrrb 3023 -VMINPDZrrbk 3024 -VMINPDZrrbkz 3025 -VMINPDZrrk 3026 -VMINPDZrrkz 3027 -VMINPDrm 3028 -VMINPDrr 3029 -VMINPHZ 3030 -VMINPHZrm 3031 -VMINPHZrmb 3032 -VMINPHZrmbk 3033 -VMINPHZrmbkz 3034 -VMINPHZrmk 3035 -VMINPHZrmkz 3036 -VMINPHZrr 3037 -VMINPHZrrb 3038 -VMINPHZrrbk 3039 -VMINPHZrrbkz 3040 -VMINPHZrrk 3041 -VMINPHZrrkz 3042 -VMINPSYrm 3043 -VMINPSYrr 3044 -VMINPSZ 3045 -VMINPSZrm 3046 -VMINPSZrmb 3047 -VMINPSZrmbk 3048 -VMINPSZrmbkz 3049 -VMINPSZrmk 3050 -VMINPSZrmkz 3051 -VMINPSZrr 3052 -VMINPSZrrb 3053 -VMINPSZrrbk 3054 -VMINPSZrrbkz 3055 -VMINPSZrrk 3056 -VMINPSZrrkz 3057 -VMINPSrm 3058 -VMINPSrr 3059 -VMINSDZrm 3060 -VMINSDZrm_Int 3061 -VMINSDZrmk_Int 3062 -VMINSDZrmkz_Int 3063 -VMINSDZrr 3064 -VMINSDZrr_Int 3065 -VMINSDZrrb_Int 3066 -VMINSDZrrbk_Int 3067 -VMINSDZrrbkz_Int 3068 -VMINSDZrrk_Int 3069 -VMINSDZrrkz_Int 3070 -VMINSDrm 3071 -VMINSDrm_Int 3072 -VMINSDrr 3073 -VMINSDrr_Int 3074 -VMINSHZrm 3075 -VMINSHZrm_Int 3076 -VMINSHZrmk_Int 3077 -VMINSHZrmkz_Int 3078 -VMINSHZrr 3079 -VMINSHZrr_Int 3080 -VMINSHZrrb_Int 3081 -VMINSHZrrbk_Int 3082 -VMINSHZrrbkz_Int 3083 -VMINSHZrrk_Int 3084 -VMINSHZrrkz_Int 3085 -VMINSSZrm 3086 -VMINSSZrm_Int 3087 -VMINSSZrmk_Int 3088 -VMINSSZrmkz_Int 3089 -VMINSSZrr 3090 -VMINSSZrr_Int 3091 -VMINSSZrrb_Int 3092 -VMINSSZrrbk_Int 3093 -VMINSSZrrbkz_Int 3094 -VMINSSZrrk_Int 3095 -VMINSSZrrkz_Int 3096 -VMINSSrm 3097 -VMINSSrm_Int 3098 -VMINSSrr 3099 -VMINSSrr_Int 3100 -VMLAUNCH 3101 -VMLOAD 3102 -VMMCALL 3103 -VMOV 3104 -VMOVAPDYmr 3105 -VMOVAPDYrm 3106 -VMOVAPDYrr 3107 -VMOVAPDYrr_REV 3108 -VMOVAPDZ 3109 -VMOVAPDZmr 3110 -VMOVAPDZmrk 3111 -VMOVAPDZrm 3112 -VMOVAPDZrmk 3113 -VMOVAPDZrmkz 3114 -VMOVAPDZrr 3115 -VMOVAPDZrr_REV 3116 -VMOVAPDZrrk 3117 -VMOVAPDZrrk_REV 3118 -VMOVAPDZrrkz 3119 -VMOVAPDZrrkz_REV 3120 -VMOVAPDmr 3121 -VMOVAPDrm 3122 -VMOVAPDrr 3123 -VMOVAPDrr_REV 3124 -VMOVAPSYmr 3125 -VMOVAPSYrm 3126 -VMOVAPSYrr 3127 -VMOVAPSYrr_REV 3128 -VMOVAPSZ 3129 -VMOVAPSZmr 3130 -VMOVAPSZmrk 3131 -VMOVAPSZrm 3132 -VMOVAPSZrmk 3133 -VMOVAPSZrmkz 3134 -VMOVAPSZrr 3135 -VMOVAPSZrr_REV 3136 -VMOVAPSZrrk 3137 -VMOVAPSZrrk_REV 3138 -VMOVAPSZrrkz 3139 -VMOVAPSZrrkz_REV 3140 -VMOVAPSmr 3141 -VMOVAPSrm 3142 -VMOVAPSrr 3143 -VMOVAPSrr_REV 3144 -VMOVDDUPYrm 3145 -VMOVDDUPYrr 3146 -VMOVDDUPZ 3147 -VMOVDDUPZrm 3148 -VMOVDDUPZrmk 3149 -VMOVDDUPZrmkz 3150 -VMOVDDUPZrr 3151 -VMOVDDUPZrrk 3152 -VMOVDDUPZrrkz 3153 -VMOVDDUPrm 3154 -VMOVDDUPrr 3155 -VMOVDI 3156 -VMOVDQA 3157 -VMOVDQAYmr 3158 -VMOVDQAYrm 3159 -VMOVDQAYrr 3160 -VMOVDQAYrr_REV 3161 -VMOVDQAmr 3162 -VMOVDQArm 3163 -VMOVDQArr 3164 -VMOVDQArr_REV 3165 -VMOVDQU 3166 -VMOVDQUYmr 3167 -VMOVDQUYrm 3168 -VMOVDQUYrr 3169 -VMOVDQUYrr_REV 3170 -VMOVDQUmr 3171 -VMOVDQUrm 3172 -VMOVDQUrr 3173 -VMOVDQUrr_REV 3174 -VMOVHLPSZrr 3175 -VMOVHLPSrr 3176 -VMOVHPDZ 3177 -VMOVHPDmr 3178 -VMOVHPDrm 3179 -VMOVHPSZ 3180 -VMOVHPSmr 3181 -VMOVHPSrm 3182 -VMOVLHPSZrr 3183 -VMOVLHPSrr 3184 -VMOVLPDZ 3185 -VMOVLPDmr 3186 -VMOVLPDrm 3187 -VMOVLPSZ 3188 -VMOVLPSmr 3189 -VMOVLPSrm 3190 -VMOVMSKPDYrr 3191 -VMOVMSKPDrr 3192 -VMOVMSKPSYrr 3193 -VMOVMSKPSrr 3194 -VMOVNTDQAYrm 3195 -VMOVNTDQAZ 3196 -VMOVNTDQAZrm 3197 -VMOVNTDQArm 3198 -VMOVNTDQYmr 3199 -VMOVNTDQZ 3200 -VMOVNTDQZmr 3201 -VMOVNTDQmr 3202 -VMOVNTPDYmr 3203 -VMOVNTPDZ 3204 -VMOVNTPDZmr 3205 -VMOVNTPDmr 3206 -VMOVNTPSYmr 3207 -VMOVNTPSZ 3208 -VMOVNTPSZmr 3209 -VMOVNTPSmr 3210 -VMOVPDI 3211 -VMOVPQI 3212 -VMOVPQIto 3213 -VMOVQI 3214 -VMOVRSBZ 3215 -VMOVRSBZm 3216 -VMOVRSBZmk 3217 -VMOVRSBZmkz 3218 -VMOVRSDZ 3219 -VMOVRSDZm 3220 -VMOVRSDZmk 3221 -VMOVRSDZmkz 3222 -VMOVRSQZ 3223 -VMOVRSQZm 3224 -VMOVRSQZmk 3225 -VMOVRSQZmkz 3226 -VMOVRSWZ 3227 -VMOVRSWZm 3228 -VMOVRSWZmk 3229 -VMOVRSWZmkz 3230 -VMOVSDZmr 3231 -VMOVSDZmrk 3232 -VMOVSDZrm 3233 -VMOVSDZrm_alt 3234 -VMOVSDZrmk 3235 -VMOVSDZrmkz 3236 -VMOVSDZrr 3237 -VMOVSDZrr_REV 3238 -VMOVSDZrrk 3239 -VMOVSDZrrk_REV 3240 -VMOVSDZrrkz 3241 -VMOVSDZrrkz_REV 3242 -VMOVSDmr 3243 -VMOVSDrm 3244 -VMOVSDrm_alt 3245 -VMOVSDrr 3246 -VMOVSDrr_REV 3247 -VMOVSDto 3248 -VMOVSH 3249 -VMOVSHDUPYrm 3250 -VMOVSHDUPYrr 3251 -VMOVSHDUPZ 3252 -VMOVSHDUPZrm 3253 -VMOVSHDUPZrmk 3254 -VMOVSHDUPZrmkz 3255 -VMOVSHDUPZrr 3256 -VMOVSHDUPZrrk 3257 -VMOVSHDUPZrrkz 3258 -VMOVSHDUPrm 3259 -VMOVSHDUPrr 3260 -VMOVSHZmr 3261 -VMOVSHZmrk 3262 -VMOVSHZrm 3263 -VMOVSHZrm_alt 3264 -VMOVSHZrmk 3265 -VMOVSHZrmkz 3266 -VMOVSHZrr 3267 -VMOVSHZrr_REV 3268 -VMOVSHZrrk 3269 -VMOVSHZrrk_REV 3270 -VMOVSHZrrkz 3271 -VMOVSHZrrkz_REV 3272 -VMOVSHtoW 3273 -VMOVSLDUPYrm 3274 -VMOVSLDUPYrr 3275 -VMOVSLDUPZ 3276 -VMOVSLDUPZrm 3277 -VMOVSLDUPZrmk 3278 -VMOVSLDUPZrmkz 3279 -VMOVSLDUPZrr 3280 -VMOVSLDUPZrrk 3281 -VMOVSLDUPZrrkz 3282 -VMOVSLDUPrm 3283 -VMOVSLDUPrr 3284 -VMOVSS 3285 -VMOVSSZmr 3286 -VMOVSSZmrk 3287 -VMOVSSZrm 3288 -VMOVSSZrm_alt 3289 -VMOVSSZrmk 3290 -VMOVSSZrmkz 3291 -VMOVSSZrr 3292 -VMOVSSZrr_REV 3293 -VMOVSSZrrk 3294 -VMOVSSZrrk_REV 3295 -VMOVSSZrrkz 3296 -VMOVSSZrrkz_REV 3297 -VMOVSSmr 3298 -VMOVSSrm 3299 -VMOVSSrm_alt 3300 -VMOVSSrr 3301 -VMOVSSrr_REV 3302 -VMOVUPDYmr 3303 -VMOVUPDYrm 3304 -VMOVUPDYrr 3305 -VMOVUPDYrr_REV 3306 -VMOVUPDZ 3307 -VMOVUPDZmr 3308 -VMOVUPDZmrk 3309 -VMOVUPDZrm 3310 -VMOVUPDZrmk 3311 -VMOVUPDZrmkz 3312 -VMOVUPDZrr 3313 -VMOVUPDZrr_REV 3314 -VMOVUPDZrrk 3315 -VMOVUPDZrrk_REV 3316 -VMOVUPDZrrkz 3317 -VMOVUPDZrrkz_REV 3318 -VMOVUPDmr 3319 -VMOVUPDrm 3320 -VMOVUPDrr 3321 -VMOVUPDrr_REV 3322 -VMOVUPSYmr 3323 -VMOVUPSYrm 3324 -VMOVUPSYrr 3325 -VMOVUPSYrr_REV 3326 -VMOVUPSZ 3327 -VMOVUPSZmr 3328 -VMOVUPSZmrk 3329 -VMOVUPSZrm 3330 -VMOVUPSZrmk 3331 -VMOVUPSZrmkz 3332 -VMOVUPSZrr 3333 -VMOVUPSZrr_REV 3334 -VMOVUPSZrrk 3335 -VMOVUPSZrrk_REV 3336 -VMOVUPSZrrkz 3337 -VMOVUPSZrrkz_REV 3338 -VMOVUPSmr 3339 -VMOVUPSrm 3340 -VMOVUPSrr 3341 -VMOVUPSrr_REV 3342 -VMOVW 3343 -VMOVWmr 3344 -VMOVWrm 3345 -VMOVZPDILo 3346 -VMOVZPQILo 3347 -VMOVZPWILo 3348 -VMPSADBWYrmi 3349 -VMPSADBWYrri 3350 -VMPSADBWZ 3351 -VMPSADBWZrmi 3352 -VMPSADBWZrmik 3353 -VMPSADBWZrmikz 3354 -VMPSADBWZrri 3355 -VMPSADBWZrrik 3356 -VMPSADBWZrrikz 3357 -VMPSADBWrmi 3358 -VMPSADBWrri 3359 -VMPTRLDm 3360 -VMPTRSTm 3361 -VMREAD 3362 -VMRESUME 3363 -VMRUN 3364 -VMSAVE 3365 -VMULBF 3366 -VMULPDYrm 3367 -VMULPDYrr 3368 -VMULPDZ 3369 -VMULPDZrm 3370 -VMULPDZrmb 3371 -VMULPDZrmbk 3372 -VMULPDZrmbkz 3373 -VMULPDZrmk 3374 -VMULPDZrmkz 3375 -VMULPDZrr 3376 -VMULPDZrrb 3377 -VMULPDZrrbk 3378 -VMULPDZrrbkz 3379 -VMULPDZrrk 3380 -VMULPDZrrkz 3381 -VMULPDrm 3382 -VMULPDrr 3383 -VMULPHZ 3384 -VMULPHZrm 3385 -VMULPHZrmb 3386 -VMULPHZrmbk 3387 -VMULPHZrmbkz 3388 -VMULPHZrmk 3389 -VMULPHZrmkz 3390 -VMULPHZrr 3391 -VMULPHZrrb 3392 -VMULPHZrrbk 3393 -VMULPHZrrbkz 3394 -VMULPHZrrk 3395 -VMULPHZrrkz 3396 -VMULPSYrm 3397 -VMULPSYrr 3398 -VMULPSZ 3399 -VMULPSZrm 3400 -VMULPSZrmb 3401 -VMULPSZrmbk 3402 -VMULPSZrmbkz 3403 -VMULPSZrmk 3404 -VMULPSZrmkz 3405 -VMULPSZrr 3406 -VMULPSZrrb 3407 -VMULPSZrrbk 3408 -VMULPSZrrbkz 3409 -VMULPSZrrk 3410 -VMULPSZrrkz 3411 -VMULPSrm 3412 -VMULPSrr 3413 -VMULSDZrm 3414 -VMULSDZrm_Int 3415 -VMULSDZrmk_Int 3416 -VMULSDZrmkz_Int 3417 -VMULSDZrr 3418 -VMULSDZrr_Int 3419 -VMULSDZrrb_Int 3420 -VMULSDZrrbk_Int 3421 -VMULSDZrrbkz_Int 3422 -VMULSDZrrk_Int 3423 -VMULSDZrrkz_Int 3424 -VMULSDrm 3425 -VMULSDrm_Int 3426 -VMULSDrr 3427 -VMULSDrr_Int 3428 -VMULSHZrm 3429 -VMULSHZrm_Int 3430 -VMULSHZrmk_Int 3431 -VMULSHZrmkz_Int 3432 -VMULSHZrr 3433 -VMULSHZrr_Int 3434 -VMULSHZrrb_Int 3435 -VMULSHZrrbk_Int 3436 -VMULSHZrrbkz_Int 3437 -VMULSHZrrk_Int 3438 -VMULSHZrrkz_Int 3439 -VMULSSZrm 3440 -VMULSSZrm_Int 3441 -VMULSSZrmk_Int 3442 -VMULSSZrmkz_Int 3443 -VMULSSZrr 3444 -VMULSSZrr_Int 3445 -VMULSSZrrb_Int 3446 -VMULSSZrrbk_Int 3447 -VMULSSZrrbkz_Int 3448 -VMULSSZrrk_Int 3449 -VMULSSZrrkz_Int 3450 -VMULSSrm 3451 -VMULSSrm_Int 3452 -VMULSSrr 3453 -VMULSSrr_Int 3454 -VMWRITE 3455 -VMXOFF 3456 -VMXON 3457 -VORPDYrm 3458 -VORPDYrr 3459 -VORPDZ 3460 -VORPDZrm 3461 -VORPDZrmb 3462 -VORPDZrmbk 3463 -VORPDZrmbkz 3464 -VORPDZrmk 3465 -VORPDZrmkz 3466 -VORPDZrr 3467 -VORPDZrrk 3468 -VORPDZrrkz 3469 -VORPDrm 3470 -VORPDrr 3471 -VORPSYrm 3472 -VORPSYrr 3473 -VORPSZ 3474 -VORPSZrm 3475 -VORPSZrmb 3476 -VORPSZrmbk 3477 -VORPSZrmbkz 3478 -VORPSZrmk 3479 -VORPSZrmkz 3480 -VORPSZrr 3481 -VORPSZrrk 3482 -VORPSZrrkz 3483 -VORPSrm 3484 -VORPSrr 3485 -VP 3486 -VPABSBYrm 3487 -VPABSBYrr 3488 -VPABSBZ 3489 -VPABSBZrm 3490 -VPABSBZrmk 3491 -VPABSBZrmkz 3492 -VPABSBZrr 3493 -VPABSBZrrk 3494 -VPABSBZrrkz 3495 -VPABSBrm 3496 -VPABSBrr 3497 -VPABSDYrm 3498 -VPABSDYrr 3499 -VPABSDZ 3500 -VPABSDZrm 3501 -VPABSDZrmb 3502 -VPABSDZrmbk 3503 -VPABSDZrmbkz 3504 -VPABSDZrmk 3505 -VPABSDZrmkz 3506 -VPABSDZrr 3507 -VPABSDZrrk 3508 -VPABSDZrrkz 3509 -VPABSDrm 3510 -VPABSDrr 3511 -VPABSQZ 3512 -VPABSQZrm 3513 -VPABSQZrmb 3514 -VPABSQZrmbk 3515 -VPABSQZrmbkz 3516 -VPABSQZrmk 3517 -VPABSQZrmkz 3518 -VPABSQZrr 3519 -VPABSQZrrk 3520 -VPABSQZrrkz 3521 -VPABSWYrm 3522 -VPABSWYrr 3523 -VPABSWZ 3524 -VPABSWZrm 3525 -VPABSWZrmk 3526 -VPABSWZrmkz 3527 -VPABSWZrr 3528 -VPABSWZrrk 3529 -VPABSWZrrkz 3530 -VPABSWrm 3531 -VPABSWrr 3532 -VPACKSSDWYrm 3533 -VPACKSSDWYrr 3534 -VPACKSSDWZ 3535 -VPACKSSDWZrm 3536 -VPACKSSDWZrmb 3537 -VPACKSSDWZrmbk 3538 -VPACKSSDWZrmbkz 3539 -VPACKSSDWZrmk 3540 -VPACKSSDWZrmkz 3541 -VPACKSSDWZrr 3542 -VPACKSSDWZrrk 3543 -VPACKSSDWZrrkz 3544 -VPACKSSDWrm 3545 -VPACKSSDWrr 3546 -VPACKSSWBYrm 3547 -VPACKSSWBYrr 3548 -VPACKSSWBZ 3549 -VPACKSSWBZrm 3550 -VPACKSSWBZrmk 3551 -VPACKSSWBZrmkz 3552 -VPACKSSWBZrr 3553 -VPACKSSWBZrrk 3554 -VPACKSSWBZrrkz 3555 -VPACKSSWBrm 3556 -VPACKSSWBrr 3557 -VPACKUSDWYrm 3558 -VPACKUSDWYrr 3559 -VPACKUSDWZ 3560 -VPACKUSDWZrm 3561 -VPACKUSDWZrmb 3562 -VPACKUSDWZrmbk 3563 -VPACKUSDWZrmbkz 3564 -VPACKUSDWZrmk 3565 -VPACKUSDWZrmkz 3566 -VPACKUSDWZrr 3567 -VPACKUSDWZrrk 3568 -VPACKUSDWZrrkz 3569 -VPACKUSDWrm 3570 -VPACKUSDWrr 3571 -VPACKUSWBYrm 3572 -VPACKUSWBYrr 3573 -VPACKUSWBZ 3574 -VPACKUSWBZrm 3575 -VPACKUSWBZrmk 3576 -VPACKUSWBZrmkz 3577 -VPACKUSWBZrr 3578 -VPACKUSWBZrrk 3579 -VPACKUSWBZrrkz 3580 -VPACKUSWBrm 3581 -VPACKUSWBrr 3582 -VPADDBYrm 3583 -VPADDBYrr 3584 -VPADDBZ 3585 -VPADDBZrm 3586 -VPADDBZrmk 3587 -VPADDBZrmkz 3588 -VPADDBZrr 3589 -VPADDBZrrk 3590 -VPADDBZrrkz 3591 -VPADDBrm 3592 -VPADDBrr 3593 -VPADDDYrm 3594 -VPADDDYrr 3595 -VPADDDZ 3596 -VPADDDZrm 3597 -VPADDDZrmb 3598 -VPADDDZrmbk 3599 -VPADDDZrmbkz 3600 -VPADDDZrmk 3601 -VPADDDZrmkz 3602 -VPADDDZrr 3603 -VPADDDZrrk 3604 -VPADDDZrrkz 3605 -VPADDDrm 3606 -VPADDDrr 3607 -VPADDQYrm 3608 -VPADDQYrr 3609 -VPADDQZ 3610 -VPADDQZrm 3611 -VPADDQZrmb 3612 -VPADDQZrmbk 3613 -VPADDQZrmbkz 3614 -VPADDQZrmk 3615 -VPADDQZrmkz 3616 -VPADDQZrr 3617 -VPADDQZrrk 3618 -VPADDQZrrkz 3619 -VPADDQrm 3620 -VPADDQrr 3621 -VPADDSBYrm 3622 -VPADDSBYrr 3623 -VPADDSBZ 3624 -VPADDSBZrm 3625 -VPADDSBZrmk 3626 -VPADDSBZrmkz 3627 -VPADDSBZrr 3628 -VPADDSBZrrk 3629 -VPADDSBZrrkz 3630 -VPADDSBrm 3631 -VPADDSBrr 3632 -VPADDSWYrm 3633 -VPADDSWYrr 3634 -VPADDSWZ 3635 -VPADDSWZrm 3636 -VPADDSWZrmk 3637 -VPADDSWZrmkz 3638 -VPADDSWZrr 3639 -VPADDSWZrrk 3640 -VPADDSWZrrkz 3641 -VPADDSWrm 3642 -VPADDSWrr 3643 -VPADDUSBYrm 3644 -VPADDUSBYrr 3645 -VPADDUSBZ 3646 -VPADDUSBZrm 3647 -VPADDUSBZrmk 3648 -VPADDUSBZrmkz 3649 -VPADDUSBZrr 3650 -VPADDUSBZrrk 3651 -VPADDUSBZrrkz 3652 -VPADDUSBrm 3653 -VPADDUSBrr 3654 -VPADDUSWYrm 3655 -VPADDUSWYrr 3656 -VPADDUSWZ 3657 -VPADDUSWZrm 3658 -VPADDUSWZrmk 3659 -VPADDUSWZrmkz 3660 -VPADDUSWZrr 3661 -VPADDUSWZrrk 3662 -VPADDUSWZrrkz 3663 -VPADDUSWrm 3664 -VPADDUSWrr 3665 -VPADDWYrm 3666 -VPADDWYrr 3667 -VPADDWZ 3668 -VPADDWZrm 3669 -VPADDWZrmk 3670 -VPADDWZrmkz 3671 -VPADDWZrr 3672 -VPADDWZrrk 3673 -VPADDWZrrkz 3674 -VPADDWrm 3675 -VPADDWrr 3676 -VPALIGNRYrmi 3677 -VPALIGNRYrri 3678 -VPALIGNRZ 3679 -VPALIGNRZrmi 3680 -VPALIGNRZrmik 3681 -VPALIGNRZrmikz 3682 -VPALIGNRZrri 3683 -VPALIGNRZrrik 3684 -VPALIGNRZrrikz 3685 -VPALIGNRrmi 3686 -VPALIGNRrri 3687 -VPANDDZ 3688 -VPANDDZrm 3689 -VPANDDZrmb 3690 -VPANDDZrmbk 3691 -VPANDDZrmbkz 3692 -VPANDDZrmk 3693 -VPANDDZrmkz 3694 -VPANDDZrr 3695 -VPANDDZrrk 3696 -VPANDDZrrkz 3697 -VPANDNDZ 3698 -VPANDNDZrm 3699 -VPANDNDZrmb 3700 -VPANDNDZrmbk 3701 -VPANDNDZrmbkz 3702 -VPANDNDZrmk 3703 -VPANDNDZrmkz 3704 -VPANDNDZrr 3705 -VPANDNDZrrk 3706 -VPANDNDZrrkz 3707 -VPANDNQZ 3708 -VPANDNQZrm 3709 -VPANDNQZrmb 3710 -VPANDNQZrmbk 3711 -VPANDNQZrmbkz 3712 -VPANDNQZrmk 3713 -VPANDNQZrmkz 3714 -VPANDNQZrr 3715 -VPANDNQZrrk 3716 -VPANDNQZrrkz 3717 -VPANDNYrm 3718 -VPANDNYrr 3719 -VPANDNrm 3720 -VPANDNrr 3721 -VPANDQZ 3722 -VPANDQZrm 3723 -VPANDQZrmb 3724 -VPANDQZrmbk 3725 -VPANDQZrmbkz 3726 -VPANDQZrmk 3727 -VPANDQZrmkz 3728 -VPANDQZrr 3729 -VPANDQZrrk 3730 -VPANDQZrrkz 3731 -VPANDYrm 3732 -VPANDYrr 3733 -VPANDrm 3734 -VPANDrr 3735 -VPAVGBYrm 3736 -VPAVGBYrr 3737 -VPAVGBZ 3738 -VPAVGBZrm 3739 -VPAVGBZrmk 3740 -VPAVGBZrmkz 3741 -VPAVGBZrr 3742 -VPAVGBZrrk 3743 -VPAVGBZrrkz 3744 -VPAVGBrm 3745 -VPAVGBrr 3746 -VPAVGWYrm 3747 -VPAVGWYrr 3748 -VPAVGWZ 3749 -VPAVGWZrm 3750 -VPAVGWZrmk 3751 -VPAVGWZrmkz 3752 -VPAVGWZrr 3753 -VPAVGWZrrk 3754 -VPAVGWZrrkz 3755 -VPAVGWrm 3756 -VPAVGWrr 3757 -VPBLENDDYrmi 3758 -VPBLENDDYrri 3759 -VPBLENDDrmi 3760 -VPBLENDDrri 3761 -VPBLENDMBZ 3762 -VPBLENDMBZrm 3763 -VPBLENDMBZrmk 3764 -VPBLENDMBZrmkz 3765 -VPBLENDMBZrr 3766 -VPBLENDMBZrrk 3767 -VPBLENDMBZrrkz 3768 -VPBLENDMDZ 3769 -VPBLENDMDZrm 3770 -VPBLENDMDZrmb 3771 -VPBLENDMDZrmbk 3772 -VPBLENDMDZrmbkz 3773 -VPBLENDMDZrmk 3774 -VPBLENDMDZrmkz 3775 -VPBLENDMDZrr 3776 -VPBLENDMDZrrk 3777 -VPBLENDMDZrrkz 3778 -VPBLENDMQZ 3779 -VPBLENDMQZrm 3780 -VPBLENDMQZrmb 3781 -VPBLENDMQZrmbk 3782 -VPBLENDMQZrmbkz 3783 -VPBLENDMQZrmk 3784 -VPBLENDMQZrmkz 3785 -VPBLENDMQZrr 3786 -VPBLENDMQZrrk 3787 -VPBLENDMQZrrkz 3788 -VPBLENDMWZ 3789 -VPBLENDMWZrm 3790 -VPBLENDMWZrmk 3791 -VPBLENDMWZrmkz 3792 -VPBLENDMWZrr 3793 -VPBLENDMWZrrk 3794 -VPBLENDMWZrrkz 3795 -VPBLENDVBYrmr 3796 -VPBLENDVBYrrr 3797 -VPBLENDVBrmr 3798 -VPBLENDVBrrr 3799 -VPBLENDWYrmi 3800 -VPBLENDWYrri 3801 -VPBLENDWrmi 3802 -VPBLENDWrri 3803 -VPBROADCASTBYrm 3804 -VPBROADCASTBYrr 3805 -VPBROADCASTBZ 3806 -VPBROADCASTBZrm 3807 -VPBROADCASTBZrmk 3808 -VPBROADCASTBZrmkz 3809 -VPBROADCASTBZrr 3810 -VPBROADCASTBZrrk 3811 -VPBROADCASTBZrrkz 3812 -VPBROADCASTBrZ 3813 -VPBROADCASTBrZrr 3814 -VPBROADCASTBrZrrk 3815 -VPBROADCASTBrZrrkz 3816 -VPBROADCASTBrm 3817 -VPBROADCASTBrr 3818 -VPBROADCASTDYrm 3819 -VPBROADCASTDYrr 3820 -VPBROADCASTDZ 3821 -VPBROADCASTDZrm 3822 -VPBROADCASTDZrmk 3823 -VPBROADCASTDZrmkz 3824 -VPBROADCASTDZrr 3825 -VPBROADCASTDZrrk 3826 -VPBROADCASTDZrrkz 3827 -VPBROADCASTDrZ 3828 -VPBROADCASTDrZrr 3829 -VPBROADCASTDrZrrk 3830 -VPBROADCASTDrZrrkz 3831 -VPBROADCASTDrm 3832 -VPBROADCASTDrr 3833 -VPBROADCASTMB 3834 -VPBROADCASTMW 3835 -VPBROADCASTQYrm 3836 -VPBROADCASTQYrr 3837 -VPBROADCASTQZ 3838 -VPBROADCASTQZrm 3839 -VPBROADCASTQZrmk 3840 -VPBROADCASTQZrmkz 3841 -VPBROADCASTQZrr 3842 -VPBROADCASTQZrrk 3843 -VPBROADCASTQZrrkz 3844 -VPBROADCASTQrZ 3845 -VPBROADCASTQrZrr 3846 -VPBROADCASTQrZrrk 3847 -VPBROADCASTQrZrrkz 3848 -VPBROADCASTQrm 3849 -VPBROADCASTQrr 3850 -VPBROADCASTWYrm 3851 -VPBROADCASTWYrr 3852 -VPBROADCASTWZ 3853 -VPBROADCASTWZrm 3854 -VPBROADCASTWZrmk 3855 -VPBROADCASTWZrmkz 3856 -VPBROADCASTWZrr 3857 -VPBROADCASTWZrrk 3858 -VPBROADCASTWZrrkz 3859 -VPBROADCASTWrZ 3860 -VPBROADCASTWrZrr 3861 -VPBROADCASTWrZrrk 3862 -VPBROADCASTWrZrrkz 3863 -VPBROADCASTWrm 3864 -VPBROADCASTWrr 3865 -VPCLMULQDQYrmi 3866 -VPCLMULQDQYrri 3867 -VPCLMULQDQZ 3868 -VPCLMULQDQZrmi 3869 -VPCLMULQDQZrri 3870 -VPCLMULQDQrmi 3871 -VPCLMULQDQrri 3872 -VPCMOVYrmr 3873 -VPCMOVYrrm 3874 -VPCMOVYrrr 3875 -VPCMOVYrrr_REV 3876 -VPCMOVrmr 3877 -VPCMOVrrm 3878 -VPCMOVrrr 3879 -VPCMOVrrr_REV 3880 -VPCMPBZ 3881 -VPCMPBZrmi 3882 -VPCMPBZrmik 3883 -VPCMPBZrri 3884 -VPCMPBZrrik 3885 -VPCMPDZ 3886 -VPCMPDZrmbi 3887 -VPCMPDZrmbik 3888 -VPCMPDZrmi 3889 -VPCMPDZrmik 3890 -VPCMPDZrri 3891 -VPCMPDZrrik 3892 -VPCMPEQBYrm 3893 -VPCMPEQBYrr 3894 -VPCMPEQBZ 3895 -VPCMPEQBZrm 3896 -VPCMPEQBZrmk 3897 -VPCMPEQBZrr 3898 -VPCMPEQBZrrk 3899 -VPCMPEQBrm 3900 -VPCMPEQBrr 3901 -VPCMPEQDYrm 3902 -VPCMPEQDYrr 3903 -VPCMPEQDZ 3904 -VPCMPEQDZrm 3905 -VPCMPEQDZrmb 3906 -VPCMPEQDZrmbk 3907 -VPCMPEQDZrmk 3908 -VPCMPEQDZrr 3909 -VPCMPEQDZrrk 3910 -VPCMPEQDrm 3911 -VPCMPEQDrr 3912 -VPCMPEQQYrm 3913 -VPCMPEQQYrr 3914 -VPCMPEQQZ 3915 -VPCMPEQQZrm 3916 -VPCMPEQQZrmb 3917 -VPCMPEQQZrmbk 3918 -VPCMPEQQZrmk 3919 -VPCMPEQQZrr 3920 -VPCMPEQQZrrk 3921 -VPCMPEQQrm 3922 -VPCMPEQQrr 3923 -VPCMPEQWYrm 3924 -VPCMPEQWYrr 3925 -VPCMPEQWZ 3926 -VPCMPEQWZrm 3927 -VPCMPEQWZrmk 3928 -VPCMPEQWZrr 3929 -VPCMPEQWZrrk 3930 -VPCMPEQWrm 3931 -VPCMPEQWrr 3932 -VPCMPESTRIrmi 3933 -VPCMPESTRIrri 3934 -VPCMPESTRMrmi 3935 -VPCMPESTRMrri 3936 -VPCMPGTBYrm 3937 -VPCMPGTBYrr 3938 -VPCMPGTBZ 3939 -VPCMPGTBZrm 3940 -VPCMPGTBZrmk 3941 -VPCMPGTBZrr 3942 -VPCMPGTBZrrk 3943 -VPCMPGTBrm 3944 -VPCMPGTBrr 3945 -VPCMPGTDYrm 3946 -VPCMPGTDYrr 3947 -VPCMPGTDZ 3948 -VPCMPGTDZrm 3949 -VPCMPGTDZrmb 3950 -VPCMPGTDZrmbk 3951 -VPCMPGTDZrmk 3952 -VPCMPGTDZrr 3953 -VPCMPGTDZrrk 3954 -VPCMPGTDrm 3955 -VPCMPGTDrr 3956 -VPCMPGTQYrm 3957 -VPCMPGTQYrr 3958 -VPCMPGTQZ 3959 -VPCMPGTQZrm 3960 -VPCMPGTQZrmb 3961 -VPCMPGTQZrmbk 3962 -VPCMPGTQZrmk 3963 -VPCMPGTQZrr 3964 -VPCMPGTQZrrk 3965 -VPCMPGTQrm 3966 -VPCMPGTQrr 3967 -VPCMPGTWYrm 3968 -VPCMPGTWYrr 3969 -VPCMPGTWZ 3970 -VPCMPGTWZrm 3971 -VPCMPGTWZrmk 3972 -VPCMPGTWZrr 3973 -VPCMPGTWZrrk 3974 -VPCMPGTWrm 3975 -VPCMPGTWrr 3976 -VPCMPISTRIrmi 3977 -VPCMPISTRIrri 3978 -VPCMPISTRMrmi 3979 -VPCMPISTRMrri 3980 -VPCMPQZ 3981 -VPCMPQZrmbi 3982 -VPCMPQZrmbik 3983 -VPCMPQZrmi 3984 -VPCMPQZrmik 3985 -VPCMPQZrri 3986 -VPCMPQZrrik 3987 -VPCMPUBZ 3988 -VPCMPUBZrmi 3989 -VPCMPUBZrmik 3990 -VPCMPUBZrri 3991 -VPCMPUBZrrik 3992 -VPCMPUDZ 3993 -VPCMPUDZrmbi 3994 -VPCMPUDZrmbik 3995 -VPCMPUDZrmi 3996 -VPCMPUDZrmik 3997 -VPCMPUDZrri 3998 -VPCMPUDZrrik 3999 -VPCMPUQZ 4000 -VPCMPUQZrmbi 4001 -VPCMPUQZrmbik 4002 -VPCMPUQZrmi 4003 -VPCMPUQZrmik 4004 -VPCMPUQZrri 4005 -VPCMPUQZrrik 4006 -VPCMPUWZ 4007 -VPCMPUWZrmi 4008 -VPCMPUWZrmik 4009 -VPCMPUWZrri 4010 -VPCMPUWZrrik 4011 -VPCMPWZ 4012 -VPCMPWZrmi 4013 -VPCMPWZrmik 4014 -VPCMPWZrri 4015 -VPCMPWZrrik 4016 -VPCOMBmi 4017 -VPCOMBri 4018 -VPCOMDmi 4019 -VPCOMDri 4020 -VPCOMPRESSBZ 4021 -VPCOMPRESSBZmr 4022 -VPCOMPRESSBZmrk 4023 -VPCOMPRESSBZrr 4024 -VPCOMPRESSBZrrk 4025 -VPCOMPRESSBZrrkz 4026 -VPCOMPRESSDZ 4027 -VPCOMPRESSDZmr 4028 -VPCOMPRESSDZmrk 4029 -VPCOMPRESSDZrr 4030 -VPCOMPRESSDZrrk 4031 -VPCOMPRESSDZrrkz 4032 -VPCOMPRESSQZ 4033 -VPCOMPRESSQZmr 4034 -VPCOMPRESSQZmrk 4035 -VPCOMPRESSQZrr 4036 -VPCOMPRESSQZrrk 4037 -VPCOMPRESSQZrrkz 4038 -VPCOMPRESSWZ 4039 -VPCOMPRESSWZmr 4040 -VPCOMPRESSWZmrk 4041 -VPCOMPRESSWZrr 4042 -VPCOMPRESSWZrrk 4043 -VPCOMPRESSWZrrkz 4044 -VPCOMQmi 4045 -VPCOMQri 4046 -VPCOMUBmi 4047 -VPCOMUBri 4048 -VPCOMUDmi 4049 -VPCOMUDri 4050 -VPCOMUQmi 4051 -VPCOMUQri 4052 -VPCOMUWmi 4053 -VPCOMUWri 4054 -VPCOMWmi 4055 -VPCOMWri 4056 -VPCONFLICTDZ 4057 -VPCONFLICTDZrm 4058 -VPCONFLICTDZrmb 4059 -VPCONFLICTDZrmbk 4060 -VPCONFLICTDZrmbkz 4061 -VPCONFLICTDZrmk 4062 -VPCONFLICTDZrmkz 4063 -VPCONFLICTDZrr 4064 -VPCONFLICTDZrrk 4065 -VPCONFLICTDZrrkz 4066 -VPCONFLICTQZ 4067 -VPCONFLICTQZrm 4068 -VPCONFLICTQZrmb 4069 -VPCONFLICTQZrmbk 4070 -VPCONFLICTQZrmbkz 4071 -VPCONFLICTQZrmk 4072 -VPCONFLICTQZrmkz 4073 -VPCONFLICTQZrr 4074 -VPCONFLICTQZrrk 4075 -VPCONFLICTQZrrkz 4076 -VPDPBSSDSYrm 4077 -VPDPBSSDSYrr 4078 -VPDPBSSDSZ 4079 -VPDPBSSDSZrm 4080 -VPDPBSSDSZrmb 4081 -VPDPBSSDSZrmbk 4082 -VPDPBSSDSZrmbkz 4083 -VPDPBSSDSZrmk 4084 -VPDPBSSDSZrmkz 4085 -VPDPBSSDSZrr 4086 -VPDPBSSDSZrrk 4087 -VPDPBSSDSZrrkz 4088 -VPDPBSSDSrm 4089 -VPDPBSSDSrr 4090 -VPDPBSSDYrm 4091 -VPDPBSSDYrr 4092 -VPDPBSSDZ 4093 -VPDPBSSDZrm 4094 -VPDPBSSDZrmb 4095 -VPDPBSSDZrmbk 4096 -VPDPBSSDZrmbkz 4097 -VPDPBSSDZrmk 4098 -VPDPBSSDZrmkz 4099 -VPDPBSSDZrr 4100 -VPDPBSSDZrrk 4101 -VPDPBSSDZrrkz 4102 -VPDPBSSDrm 4103 -VPDPBSSDrr 4104 -VPDPBSUDSYrm 4105 -VPDPBSUDSYrr 4106 -VPDPBSUDSZ 4107 -VPDPBSUDSZrm 4108 -VPDPBSUDSZrmb 4109 -VPDPBSUDSZrmbk 4110 -VPDPBSUDSZrmbkz 4111 -VPDPBSUDSZrmk 4112 -VPDPBSUDSZrmkz 4113 -VPDPBSUDSZrr 4114 -VPDPBSUDSZrrk 4115 -VPDPBSUDSZrrkz 4116 -VPDPBSUDSrm 4117 -VPDPBSUDSrr 4118 -VPDPBSUDYrm 4119 -VPDPBSUDYrr 4120 -VPDPBSUDZ 4121 -VPDPBSUDZrm 4122 -VPDPBSUDZrmb 4123 -VPDPBSUDZrmbk 4124 -VPDPBSUDZrmbkz 4125 -VPDPBSUDZrmk 4126 -VPDPBSUDZrmkz 4127 -VPDPBSUDZrr 4128 -VPDPBSUDZrrk 4129 -VPDPBSUDZrrkz 4130 -VPDPBSUDrm 4131 -VPDPBSUDrr 4132 -VPDPBUSDSYrm 4133 -VPDPBUSDSYrr 4134 -VPDPBUSDSZ 4135 -VPDPBUSDSZrm 4136 -VPDPBUSDSZrmb 4137 -VPDPBUSDSZrmbk 4138 -VPDPBUSDSZrmbkz 4139 -VPDPBUSDSZrmk 4140 -VPDPBUSDSZrmkz 4141 -VPDPBUSDSZrr 4142 -VPDPBUSDSZrrk 4143 -VPDPBUSDSZrrkz 4144 -VPDPBUSDSrm 4145 -VPDPBUSDSrr 4146 -VPDPBUSDYrm 4147 -VPDPBUSDYrr 4148 -VPDPBUSDZ 4149 -VPDPBUSDZrm 4150 -VPDPBUSDZrmb 4151 -VPDPBUSDZrmbk 4152 -VPDPBUSDZrmbkz 4153 -VPDPBUSDZrmk 4154 -VPDPBUSDZrmkz 4155 -VPDPBUSDZrr 4156 -VPDPBUSDZrrk 4157 -VPDPBUSDZrrkz 4158 -VPDPBUSDrm 4159 -VPDPBUSDrr 4160 -VPDPBUUDSYrm 4161 -VPDPBUUDSYrr 4162 -VPDPBUUDSZ 4163 -VPDPBUUDSZrm 4164 -VPDPBUUDSZrmb 4165 -VPDPBUUDSZrmbk 4166 -VPDPBUUDSZrmbkz 4167 -VPDPBUUDSZrmk 4168 -VPDPBUUDSZrmkz 4169 -VPDPBUUDSZrr 4170 -VPDPBUUDSZrrk 4171 -VPDPBUUDSZrrkz 4172 -VPDPBUUDSrm 4173 -VPDPBUUDSrr 4174 -VPDPBUUDYrm 4175 -VPDPBUUDYrr 4176 -VPDPBUUDZ 4177 -VPDPBUUDZrm 4178 -VPDPBUUDZrmb 4179 -VPDPBUUDZrmbk 4180 -VPDPBUUDZrmbkz 4181 -VPDPBUUDZrmk 4182 -VPDPBUUDZrmkz 4183 -VPDPBUUDZrr 4184 -VPDPBUUDZrrk 4185 -VPDPBUUDZrrkz 4186 -VPDPBUUDrm 4187 -VPDPBUUDrr 4188 -VPDPWSSDSYrm 4189 -VPDPWSSDSYrr 4190 -VPDPWSSDSZ 4191 -VPDPWSSDSZrm 4192 -VPDPWSSDSZrmb 4193 -VPDPWSSDSZrmbk 4194 -VPDPWSSDSZrmbkz 4195 -VPDPWSSDSZrmk 4196 -VPDPWSSDSZrmkz 4197 -VPDPWSSDSZrr 4198 -VPDPWSSDSZrrk 4199 -VPDPWSSDSZrrkz 4200 -VPDPWSSDSrm 4201 -VPDPWSSDSrr 4202 -VPDPWSSDYrm 4203 -VPDPWSSDYrr 4204 -VPDPWSSDZ 4205 -VPDPWSSDZrm 4206 -VPDPWSSDZrmb 4207 -VPDPWSSDZrmbk 4208 -VPDPWSSDZrmbkz 4209 -VPDPWSSDZrmk 4210 -VPDPWSSDZrmkz 4211 -VPDPWSSDZrr 4212 -VPDPWSSDZrrk 4213 -VPDPWSSDZrrkz 4214 -VPDPWSSDrm 4215 -VPDPWSSDrr 4216 -VPDPWSUDSYrm 4217 -VPDPWSUDSYrr 4218 -VPDPWSUDSZ 4219 -VPDPWSUDSZrm 4220 -VPDPWSUDSZrmb 4221 -VPDPWSUDSZrmbk 4222 -VPDPWSUDSZrmbkz 4223 -VPDPWSUDSZrmk 4224 -VPDPWSUDSZrmkz 4225 -VPDPWSUDSZrr 4226 -VPDPWSUDSZrrk 4227 -VPDPWSUDSZrrkz 4228 -VPDPWSUDSrm 4229 -VPDPWSUDSrr 4230 -VPDPWSUDYrm 4231 -VPDPWSUDYrr 4232 -VPDPWSUDZ 4233 -VPDPWSUDZrm 4234 -VPDPWSUDZrmb 4235 -VPDPWSUDZrmbk 4236 -VPDPWSUDZrmbkz 4237 -VPDPWSUDZrmk 4238 -VPDPWSUDZrmkz 4239 -VPDPWSUDZrr 4240 -VPDPWSUDZrrk 4241 -VPDPWSUDZrrkz 4242 -VPDPWSUDrm 4243 -VPDPWSUDrr 4244 -VPDPWUSDSYrm 4245 -VPDPWUSDSYrr 4246 -VPDPWUSDSZ 4247 -VPDPWUSDSZrm 4248 -VPDPWUSDSZrmb 4249 -VPDPWUSDSZrmbk 4250 -VPDPWUSDSZrmbkz 4251 -VPDPWUSDSZrmk 4252 -VPDPWUSDSZrmkz 4253 -VPDPWUSDSZrr 4254 -VPDPWUSDSZrrk 4255 -VPDPWUSDSZrrkz 4256 -VPDPWUSDSrm 4257 -VPDPWUSDSrr 4258 -VPDPWUSDYrm 4259 -VPDPWUSDYrr 4260 -VPDPWUSDZ 4261 -VPDPWUSDZrm 4262 -VPDPWUSDZrmb 4263 -VPDPWUSDZrmbk 4264 -VPDPWUSDZrmbkz 4265 -VPDPWUSDZrmk 4266 -VPDPWUSDZrmkz 4267 -VPDPWUSDZrr 4268 -VPDPWUSDZrrk 4269 -VPDPWUSDZrrkz 4270 -VPDPWUSDrm 4271 -VPDPWUSDrr 4272 -VPDPWUUDSYrm 4273 -VPDPWUUDSYrr 4274 -VPDPWUUDSZ 4275 -VPDPWUUDSZrm 4276 -VPDPWUUDSZrmb 4277 -VPDPWUUDSZrmbk 4278 -VPDPWUUDSZrmbkz 4279 -VPDPWUUDSZrmk 4280 -VPDPWUUDSZrmkz 4281 -VPDPWUUDSZrr 4282 -VPDPWUUDSZrrk 4283 -VPDPWUUDSZrrkz 4284 -VPDPWUUDSrm 4285 -VPDPWUUDSrr 4286 -VPDPWUUDYrm 4287 -VPDPWUUDYrr 4288 -VPDPWUUDZ 4289 -VPDPWUUDZrm 4290 -VPDPWUUDZrmb 4291 -VPDPWUUDZrmbk 4292 -VPDPWUUDZrmbkz 4293 -VPDPWUUDZrmk 4294 -VPDPWUUDZrmkz 4295 -VPDPWUUDZrr 4296 -VPDPWUUDZrrk 4297 -VPDPWUUDZrrkz 4298 -VPDPWUUDrm 4299 -VPDPWUUDrr 4300 -VPERM 4301 -VPERMBZ 4302 -VPERMBZrm 4303 -VPERMBZrmk 4304 -VPERMBZrmkz 4305 -VPERMBZrr 4306 -VPERMBZrrk 4307 -VPERMBZrrkz 4308 -VPERMDYrm 4309 -VPERMDYrr 4310 -VPERMDZ 4311 -VPERMDZrm 4312 -VPERMDZrmb 4313 -VPERMDZrmbk 4314 -VPERMDZrmbkz 4315 -VPERMDZrmk 4316 -VPERMDZrmkz 4317 -VPERMDZrr 4318 -VPERMDZrrk 4319 -VPERMDZrrkz 4320 -VPERMI 4321 -VPERMIL 4322 -VPERMILPDYmi 4323 -VPERMILPDYri 4324 -VPERMILPDYrm 4325 -VPERMILPDYrr 4326 -VPERMILPDZ 4327 -VPERMILPDZmbi 4328 -VPERMILPDZmbik 4329 -VPERMILPDZmbikz 4330 -VPERMILPDZmi 4331 -VPERMILPDZmik 4332 -VPERMILPDZmikz 4333 -VPERMILPDZri 4334 -VPERMILPDZrik 4335 -VPERMILPDZrikz 4336 -VPERMILPDZrm 4337 -VPERMILPDZrmb 4338 -VPERMILPDZrmbk 4339 -VPERMILPDZrmbkz 4340 -VPERMILPDZrmk 4341 -VPERMILPDZrmkz 4342 -VPERMILPDZrr 4343 -VPERMILPDZrrk 4344 -VPERMILPDZrrkz 4345 -VPERMILPDmi 4346 -VPERMILPDri 4347 -VPERMILPDrm 4348 -VPERMILPDrr 4349 -VPERMILPSYmi 4350 -VPERMILPSYri 4351 -VPERMILPSYrm 4352 -VPERMILPSYrr 4353 -VPERMILPSZ 4354 -VPERMILPSZmbi 4355 -VPERMILPSZmbik 4356 -VPERMILPSZmbikz 4357 -VPERMILPSZmi 4358 -VPERMILPSZmik 4359 -VPERMILPSZmikz 4360 -VPERMILPSZri 4361 -VPERMILPSZrik 4362 -VPERMILPSZrikz 4363 -VPERMILPSZrm 4364 -VPERMILPSZrmb 4365 -VPERMILPSZrmbk 4366 -VPERMILPSZrmbkz 4367 -VPERMILPSZrmk 4368 -VPERMILPSZrmkz 4369 -VPERMILPSZrr 4370 -VPERMILPSZrrk 4371 -VPERMILPSZrrkz 4372 -VPERMILPSmi 4373 -VPERMILPSri 4374 -VPERMILPSrm 4375 -VPERMILPSrr 4376 -VPERMPDYmi 4377 -VPERMPDYri 4378 -VPERMPDZ 4379 -VPERMPDZmbi 4380 -VPERMPDZmbik 4381 -VPERMPDZmbikz 4382 -VPERMPDZmi 4383 -VPERMPDZmik 4384 -VPERMPDZmikz 4385 -VPERMPDZri 4386 -VPERMPDZrik 4387 -VPERMPDZrikz 4388 -VPERMPDZrm 4389 -VPERMPDZrmb 4390 -VPERMPDZrmbk 4391 -VPERMPDZrmbkz 4392 -VPERMPDZrmk 4393 -VPERMPDZrmkz 4394 -VPERMPDZrr 4395 -VPERMPDZrrk 4396 -VPERMPDZrrkz 4397 -VPERMPSYrm 4398 -VPERMPSYrr 4399 -VPERMPSZ 4400 -VPERMPSZrm 4401 -VPERMPSZrmb 4402 -VPERMPSZrmbk 4403 -VPERMPSZrmbkz 4404 -VPERMPSZrmk 4405 -VPERMPSZrmkz 4406 -VPERMPSZrr 4407 -VPERMPSZrrk 4408 -VPERMPSZrrkz 4409 -VPERMQYmi 4410 -VPERMQYri 4411 -VPERMQZ 4412 -VPERMQZmbi 4413 -VPERMQZmbik 4414 -VPERMQZmbikz 4415 -VPERMQZmi 4416 -VPERMQZmik 4417 -VPERMQZmikz 4418 -VPERMQZri 4419 -VPERMQZrik 4420 -VPERMQZrikz 4421 -VPERMQZrm 4422 -VPERMQZrmb 4423 -VPERMQZrmbk 4424 -VPERMQZrmbkz 4425 -VPERMQZrmk 4426 -VPERMQZrmkz 4427 -VPERMQZrr 4428 -VPERMQZrrk 4429 -VPERMQZrrkz 4430 -VPERMT 4431 -VPERMWZ 4432 -VPERMWZrm 4433 -VPERMWZrmk 4434 -VPERMWZrmkz 4435 -VPERMWZrr 4436 -VPERMWZrrk 4437 -VPERMWZrrkz 4438 -VPEXPANDBZ 4439 -VPEXPANDBZrm 4440 -VPEXPANDBZrmk 4441 -VPEXPANDBZrmkz 4442 -VPEXPANDBZrr 4443 -VPEXPANDBZrrk 4444 -VPEXPANDBZrrkz 4445 -VPEXPANDDZ 4446 -VPEXPANDDZrm 4447 -VPEXPANDDZrmk 4448 -VPEXPANDDZrmkz 4449 -VPEXPANDDZrr 4450 -VPEXPANDDZrrk 4451 -VPEXPANDDZrrkz 4452 -VPEXPANDQZ 4453 -VPEXPANDQZrm 4454 -VPEXPANDQZrmk 4455 -VPEXPANDQZrmkz 4456 -VPEXPANDQZrr 4457 -VPEXPANDQZrrk 4458 -VPEXPANDQZrrkz 4459 -VPEXPANDWZ 4460 -VPEXPANDWZrm 4461 -VPEXPANDWZrmk 4462 -VPEXPANDWZrmkz 4463 -VPEXPANDWZrr 4464 -VPEXPANDWZrrk 4465 -VPEXPANDWZrrkz 4466 -VPEXTRBZmri 4467 -VPEXTRBZrri 4468 -VPEXTRBmri 4469 -VPEXTRBrri 4470 -VPEXTRDZmri 4471 -VPEXTRDZrri 4472 -VPEXTRDmri 4473 -VPEXTRDrri 4474 -VPEXTRQZmri 4475 -VPEXTRQZrri 4476 -VPEXTRQmri 4477 -VPEXTRQrri 4478 -VPEXTRWZmri 4479 -VPEXTRWZrri 4480 -VPEXTRWZrri_REV 4481 -VPEXTRWmri 4482 -VPEXTRWrri 4483 -VPEXTRWrri_REV 4484 -VPGATHERDDYrm 4485 -VPGATHERDDZ 4486 -VPGATHERDDZrm 4487 -VPGATHERDDrm 4488 -VPGATHERDQYrm 4489 -VPGATHERDQZ 4490 -VPGATHERDQZrm 4491 -VPGATHERDQrm 4492 -VPGATHERQDYrm 4493 -VPGATHERQDZ 4494 -VPGATHERQDZrm 4495 -VPGATHERQDrm 4496 -VPGATHERQQYrm 4497 -VPGATHERQQZ 4498 -VPGATHERQQZrm 4499 -VPGATHERQQrm 4500 -VPHADDBDrm 4501 -VPHADDBDrr 4502 -VPHADDBQrm 4503 -VPHADDBQrr 4504 -VPHADDBWrm 4505 -VPHADDBWrr 4506 -VPHADDDQrm 4507 -VPHADDDQrr 4508 -VPHADDDYrm 4509 -VPHADDDYrr 4510 -VPHADDDrm 4511 -VPHADDDrr 4512 -VPHADDSWYrm 4513 -VPHADDSWYrr 4514 -VPHADDSWrm 4515 -VPHADDSWrr 4516 -VPHADDUBDrm 4517 -VPHADDUBDrr 4518 -VPHADDUBQrm 4519 -VPHADDUBQrr 4520 -VPHADDUBWrm 4521 -VPHADDUBWrr 4522 -VPHADDUDQrm 4523 -VPHADDUDQrr 4524 -VPHADDUWDrm 4525 -VPHADDUWDrr 4526 -VPHADDUWQrm 4527 -VPHADDUWQrr 4528 -VPHADDWDrm 4529 -VPHADDWDrr 4530 -VPHADDWQrm 4531 -VPHADDWQrr 4532 -VPHADDWYrm 4533 -VPHADDWYrr 4534 -VPHADDWrm 4535 -VPHADDWrr 4536 -VPHMINPOSUWrm 4537 -VPHMINPOSUWrr 4538 -VPHSUBBWrm 4539 -VPHSUBBWrr 4540 -VPHSUBDQrm 4541 -VPHSUBDQrr 4542 -VPHSUBDYrm 4543 -VPHSUBDYrr 4544 -VPHSUBDrm 4545 -VPHSUBDrr 4546 -VPHSUBSWYrm 4547 -VPHSUBSWYrr 4548 -VPHSUBSWrm 4549 -VPHSUBSWrr 4550 -VPHSUBWDrm 4551 -VPHSUBWDrr 4552 -VPHSUBWYrm 4553 -VPHSUBWYrr 4554 -VPHSUBWrm 4555 -VPHSUBWrr 4556 -VPINSRBZrmi 4557 -VPINSRBZrri 4558 -VPINSRBrmi 4559 -VPINSRBrri 4560 -VPINSRDZrmi 4561 -VPINSRDZrri 4562 -VPINSRDrmi 4563 -VPINSRDrri 4564 -VPINSRQZrmi 4565 -VPINSRQZrri 4566 -VPINSRQrmi 4567 -VPINSRQrri 4568 -VPINSRWZrmi 4569 -VPINSRWZrri 4570 -VPINSRWrmi 4571 -VPINSRWrri 4572 -VPLZCNTDZ 4573 -VPLZCNTDZrm 4574 -VPLZCNTDZrmb 4575 -VPLZCNTDZrmbk 4576 -VPLZCNTDZrmbkz 4577 -VPLZCNTDZrmk 4578 -VPLZCNTDZrmkz 4579 -VPLZCNTDZrr 4580 -VPLZCNTDZrrk 4581 -VPLZCNTDZrrkz 4582 -VPLZCNTQZ 4583 -VPLZCNTQZrm 4584 -VPLZCNTQZrmb 4585 -VPLZCNTQZrmbk 4586 -VPLZCNTQZrmbkz 4587 -VPLZCNTQZrmk 4588 -VPLZCNTQZrmkz 4589 -VPLZCNTQZrr 4590 -VPLZCNTQZrrk 4591 -VPLZCNTQZrrkz 4592 -VPMACSDDrm 4593 -VPMACSDDrr 4594 -VPMACSDQHrm 4595 -VPMACSDQHrr 4596 -VPMACSDQLrm 4597 -VPMACSDQLrr 4598 -VPMACSSDDrm 4599 -VPMACSSDDrr 4600 -VPMACSSDQHrm 4601 -VPMACSSDQHrr 4602 -VPMACSSDQLrm 4603 -VPMACSSDQLrr 4604 -VPMACSSWDrm 4605 -VPMACSSWDrr 4606 -VPMACSSWWrm 4607 -VPMACSSWWrr 4608 -VPMACSWDrm 4609 -VPMACSWDrr 4610 -VPMACSWWrm 4611 -VPMACSWWrr 4612 -VPMADCSSWDrm 4613 -VPMADCSSWDrr 4614 -VPMADCSWDrm 4615 -VPMADCSWDrr 4616 -VPMADD 4617 -VPMADDUBSWYrm 4618 -VPMADDUBSWYrr 4619 -VPMADDUBSWZ 4620 -VPMADDUBSWZrm 4621 -VPMADDUBSWZrmk 4622 -VPMADDUBSWZrmkz 4623 -VPMADDUBSWZrr 4624 -VPMADDUBSWZrrk 4625 -VPMADDUBSWZrrkz 4626 -VPMADDUBSWrm 4627 -VPMADDUBSWrr 4628 -VPMADDWDYrm 4629 -VPMADDWDYrr 4630 -VPMADDWDZ 4631 -VPMADDWDZrm 4632 -VPMADDWDZrmk 4633 -VPMADDWDZrmkz 4634 -VPMADDWDZrr 4635 -VPMADDWDZrrk 4636 -VPMADDWDZrrkz 4637 -VPMADDWDrm 4638 -VPMADDWDrr 4639 -VPMASKMOVDYmr 4640 -VPMASKMOVDYrm 4641 -VPMASKMOVDmr 4642 -VPMASKMOVDrm 4643 -VPMASKMOVQYmr 4644 -VPMASKMOVQYrm 4645 -VPMASKMOVQmr 4646 -VPMASKMOVQrm 4647 -VPMAXSBYrm 4648 -VPMAXSBYrr 4649 -VPMAXSBZ 4650 -VPMAXSBZrm 4651 -VPMAXSBZrmk 4652 -VPMAXSBZrmkz 4653 -VPMAXSBZrr 4654 -VPMAXSBZrrk 4655 -VPMAXSBZrrkz 4656 -VPMAXSBrm 4657 -VPMAXSBrr 4658 -VPMAXSDYrm 4659 -VPMAXSDYrr 4660 -VPMAXSDZ 4661 -VPMAXSDZrm 4662 -VPMAXSDZrmb 4663 -VPMAXSDZrmbk 4664 -VPMAXSDZrmbkz 4665 -VPMAXSDZrmk 4666 -VPMAXSDZrmkz 4667 -VPMAXSDZrr 4668 -VPMAXSDZrrk 4669 -VPMAXSDZrrkz 4670 -VPMAXSDrm 4671 -VPMAXSDrr 4672 -VPMAXSQZ 4673 -VPMAXSQZrm 4674 -VPMAXSQZrmb 4675 -VPMAXSQZrmbk 4676 -VPMAXSQZrmbkz 4677 -VPMAXSQZrmk 4678 -VPMAXSQZrmkz 4679 -VPMAXSQZrr 4680 -VPMAXSQZrrk 4681 -VPMAXSQZrrkz 4682 -VPMAXSWYrm 4683 -VPMAXSWYrr 4684 -VPMAXSWZ 4685 -VPMAXSWZrm 4686 -VPMAXSWZrmk 4687 -VPMAXSWZrmkz 4688 -VPMAXSWZrr 4689 -VPMAXSWZrrk 4690 -VPMAXSWZrrkz 4691 -VPMAXSWrm 4692 -VPMAXSWrr 4693 -VPMAXUBYrm 4694 -VPMAXUBYrr 4695 -VPMAXUBZ 4696 -VPMAXUBZrm 4697 -VPMAXUBZrmk 4698 -VPMAXUBZrmkz 4699 -VPMAXUBZrr 4700 -VPMAXUBZrrk 4701 -VPMAXUBZrrkz 4702 -VPMAXUBrm 4703 -VPMAXUBrr 4704 -VPMAXUDYrm 4705 -VPMAXUDYrr 4706 -VPMAXUDZ 4707 -VPMAXUDZrm 4708 -VPMAXUDZrmb 4709 -VPMAXUDZrmbk 4710 -VPMAXUDZrmbkz 4711 -VPMAXUDZrmk 4712 -VPMAXUDZrmkz 4713 -VPMAXUDZrr 4714 -VPMAXUDZrrk 4715 -VPMAXUDZrrkz 4716 -VPMAXUDrm 4717 -VPMAXUDrr 4718 -VPMAXUQZ 4719 -VPMAXUQZrm 4720 -VPMAXUQZrmb 4721 -VPMAXUQZrmbk 4722 -VPMAXUQZrmbkz 4723 -VPMAXUQZrmk 4724 -VPMAXUQZrmkz 4725 -VPMAXUQZrr 4726 -VPMAXUQZrrk 4727 -VPMAXUQZrrkz 4728 -VPMAXUWYrm 4729 -VPMAXUWYrr 4730 -VPMAXUWZ 4731 -VPMAXUWZrm 4732 -VPMAXUWZrmk 4733 -VPMAXUWZrmkz 4734 -VPMAXUWZrr 4735 -VPMAXUWZrrk 4736 -VPMAXUWZrrkz 4737 -VPMAXUWrm 4738 -VPMAXUWrr 4739 -VPMINSBYrm 4740 -VPMINSBYrr 4741 -VPMINSBZ 4742 -VPMINSBZrm 4743 -VPMINSBZrmk 4744 -VPMINSBZrmkz 4745 -VPMINSBZrr 4746 -VPMINSBZrrk 4747 -VPMINSBZrrkz 4748 -VPMINSBrm 4749 -VPMINSBrr 4750 -VPMINSDYrm 4751 -VPMINSDYrr 4752 -VPMINSDZ 4753 -VPMINSDZrm 4754 -VPMINSDZrmb 4755 -VPMINSDZrmbk 4756 -VPMINSDZrmbkz 4757 -VPMINSDZrmk 4758 -VPMINSDZrmkz 4759 -VPMINSDZrr 4760 -VPMINSDZrrk 4761 -VPMINSDZrrkz 4762 -VPMINSDrm 4763 -VPMINSDrr 4764 -VPMINSQZ 4765 -VPMINSQZrm 4766 -VPMINSQZrmb 4767 -VPMINSQZrmbk 4768 -VPMINSQZrmbkz 4769 -VPMINSQZrmk 4770 -VPMINSQZrmkz 4771 -VPMINSQZrr 4772 -VPMINSQZrrk 4773 -VPMINSQZrrkz 4774 -VPMINSWYrm 4775 -VPMINSWYrr 4776 -VPMINSWZ 4777 -VPMINSWZrm 4778 -VPMINSWZrmk 4779 -VPMINSWZrmkz 4780 -VPMINSWZrr 4781 -VPMINSWZrrk 4782 -VPMINSWZrrkz 4783 -VPMINSWrm 4784 -VPMINSWrr 4785 -VPMINUBYrm 4786 -VPMINUBYrr 4787 -VPMINUBZ 4788 -VPMINUBZrm 4789 -VPMINUBZrmk 4790 -VPMINUBZrmkz 4791 -VPMINUBZrr 4792 -VPMINUBZrrk 4793 -VPMINUBZrrkz 4794 -VPMINUBrm 4795 -VPMINUBrr 4796 -VPMINUDYrm 4797 -VPMINUDYrr 4798 -VPMINUDZ 4799 -VPMINUDZrm 4800 -VPMINUDZrmb 4801 -VPMINUDZrmbk 4802 -VPMINUDZrmbkz 4803 -VPMINUDZrmk 4804 -VPMINUDZrmkz 4805 -VPMINUDZrr 4806 -VPMINUDZrrk 4807 -VPMINUDZrrkz 4808 -VPMINUDrm 4809 -VPMINUDrr 4810 -VPMINUQZ 4811 -VPMINUQZrm 4812 -VPMINUQZrmb 4813 -VPMINUQZrmbk 4814 -VPMINUQZrmbkz 4815 -VPMINUQZrmk 4816 -VPMINUQZrmkz 4817 -VPMINUQZrr 4818 -VPMINUQZrrk 4819 -VPMINUQZrrkz 4820 -VPMINUWYrm 4821 -VPMINUWYrr 4822 -VPMINUWZ 4823 -VPMINUWZrm 4824 -VPMINUWZrmk 4825 -VPMINUWZrmkz 4826 -VPMINUWZrr 4827 -VPMINUWZrrk 4828 -VPMINUWZrrkz 4829 -VPMINUWrm 4830 -VPMINUWrr 4831 -VPMOVB 4832 -VPMOVD 4833 -VPMOVDBZ 4834 -VPMOVDBZmr 4835 -VPMOVDBZmrk 4836 -VPMOVDBZrr 4837 -VPMOVDBZrrk 4838 -VPMOVDBZrrkz 4839 -VPMOVDWZ 4840 -VPMOVDWZmr 4841 -VPMOVDWZmrk 4842 -VPMOVDWZrr 4843 -VPMOVDWZrrk 4844 -VPMOVDWZrrkz 4845 -VPMOVM 4846 -VPMOVMSKBYrr 4847 -VPMOVMSKBrr 4848 -VPMOVQ 4849 -VPMOVQBZ 4850 -VPMOVQBZmr 4851 -VPMOVQBZmrk 4852 -VPMOVQBZrr 4853 -VPMOVQBZrrk 4854 -VPMOVQBZrrkz 4855 -VPMOVQDZ 4856 -VPMOVQDZmr 4857 -VPMOVQDZmrk 4858 -VPMOVQDZrr 4859 -VPMOVQDZrrk 4860 -VPMOVQDZrrkz 4861 -VPMOVQWZ 4862 -VPMOVQWZmr 4863 -VPMOVQWZmrk 4864 -VPMOVQWZrr 4865 -VPMOVQWZrrk 4866 -VPMOVQWZrrkz 4867 -VPMOVSDBZ 4868 -VPMOVSDBZmr 4869 -VPMOVSDBZmrk 4870 -VPMOVSDBZrr 4871 -VPMOVSDBZrrk 4872 -VPMOVSDBZrrkz 4873 -VPMOVSDWZ 4874 -VPMOVSDWZmr 4875 -VPMOVSDWZmrk 4876 -VPMOVSDWZrr 4877 -VPMOVSDWZrrk 4878 -VPMOVSDWZrrkz 4879 -VPMOVSQBZ 4880 -VPMOVSQBZmr 4881 -VPMOVSQBZmrk 4882 -VPMOVSQBZrr 4883 -VPMOVSQBZrrk 4884 -VPMOVSQBZrrkz 4885 -VPMOVSQDZ 4886 -VPMOVSQDZmr 4887 -VPMOVSQDZmrk 4888 -VPMOVSQDZrr 4889 -VPMOVSQDZrrk 4890 -VPMOVSQDZrrkz 4891 -VPMOVSQWZ 4892 -VPMOVSQWZmr 4893 -VPMOVSQWZmrk 4894 -VPMOVSQWZrr 4895 -VPMOVSQWZrrk 4896 -VPMOVSQWZrrkz 4897 -VPMOVSWBZ 4898 -VPMOVSWBZmr 4899 -VPMOVSWBZmrk 4900 -VPMOVSWBZrr 4901 -VPMOVSWBZrrk 4902 -VPMOVSWBZrrkz 4903 -VPMOVSXBDYrm 4904 -VPMOVSXBDYrr 4905 -VPMOVSXBDZ 4906 -VPMOVSXBDZrm 4907 -VPMOVSXBDZrmk 4908 -VPMOVSXBDZrmkz 4909 -VPMOVSXBDZrr 4910 -VPMOVSXBDZrrk 4911 -VPMOVSXBDZrrkz 4912 -VPMOVSXBDrm 4913 -VPMOVSXBDrr 4914 -VPMOVSXBQYrm 4915 -VPMOVSXBQYrr 4916 -VPMOVSXBQZ 4917 -VPMOVSXBQZrm 4918 -VPMOVSXBQZrmk 4919 -VPMOVSXBQZrmkz 4920 -VPMOVSXBQZrr 4921 -VPMOVSXBQZrrk 4922 -VPMOVSXBQZrrkz 4923 -VPMOVSXBQrm 4924 -VPMOVSXBQrr 4925 -VPMOVSXBWYrm 4926 -VPMOVSXBWYrr 4927 -VPMOVSXBWZ 4928 -VPMOVSXBWZrm 4929 -VPMOVSXBWZrmk 4930 -VPMOVSXBWZrmkz 4931 -VPMOVSXBWZrr 4932 -VPMOVSXBWZrrk 4933 -VPMOVSXBWZrrkz 4934 -VPMOVSXBWrm 4935 -VPMOVSXBWrr 4936 -VPMOVSXDQYrm 4937 -VPMOVSXDQYrr 4938 -VPMOVSXDQZ 4939 -VPMOVSXDQZrm 4940 -VPMOVSXDQZrmk 4941 -VPMOVSXDQZrmkz 4942 -VPMOVSXDQZrr 4943 -VPMOVSXDQZrrk 4944 -VPMOVSXDQZrrkz 4945 -VPMOVSXDQrm 4946 -VPMOVSXDQrr 4947 -VPMOVSXWDYrm 4948 -VPMOVSXWDYrr 4949 -VPMOVSXWDZ 4950 -VPMOVSXWDZrm 4951 -VPMOVSXWDZrmk 4952 -VPMOVSXWDZrmkz 4953 -VPMOVSXWDZrr 4954 -VPMOVSXWDZrrk 4955 -VPMOVSXWDZrrkz 4956 -VPMOVSXWDrm 4957 -VPMOVSXWDrr 4958 -VPMOVSXWQYrm 4959 -VPMOVSXWQYrr 4960 -VPMOVSXWQZ 4961 -VPMOVSXWQZrm 4962 -VPMOVSXWQZrmk 4963 -VPMOVSXWQZrmkz 4964 -VPMOVSXWQZrr 4965 -VPMOVSXWQZrrk 4966 -VPMOVSXWQZrrkz 4967 -VPMOVSXWQrm 4968 -VPMOVSXWQrr 4969 -VPMOVUSDBZ 4970 -VPMOVUSDBZmr 4971 -VPMOVUSDBZmrk 4972 -VPMOVUSDBZrr 4973 -VPMOVUSDBZrrk 4974 -VPMOVUSDBZrrkz 4975 -VPMOVUSDWZ 4976 -VPMOVUSDWZmr 4977 -VPMOVUSDWZmrk 4978 -VPMOVUSDWZrr 4979 -VPMOVUSDWZrrk 4980 -VPMOVUSDWZrrkz 4981 -VPMOVUSQBZ 4982 -VPMOVUSQBZmr 4983 -VPMOVUSQBZmrk 4984 -VPMOVUSQBZrr 4985 -VPMOVUSQBZrrk 4986 -VPMOVUSQBZrrkz 4987 -VPMOVUSQDZ 4988 -VPMOVUSQDZmr 4989 -VPMOVUSQDZmrk 4990 -VPMOVUSQDZrr 4991 -VPMOVUSQDZrrk 4992 -VPMOVUSQDZrrkz 4993 -VPMOVUSQWZ 4994 -VPMOVUSQWZmr 4995 -VPMOVUSQWZmrk 4996 -VPMOVUSQWZrr 4997 -VPMOVUSQWZrrk 4998 -VPMOVUSQWZrrkz 4999 -VPMOVUSWBZ 5000 -VPMOVUSWBZmr 5001 -VPMOVUSWBZmrk 5002 -VPMOVUSWBZrr 5003 -VPMOVUSWBZrrk 5004 -VPMOVUSWBZrrkz 5005 -VPMOVW 5006 -VPMOVWBZ 5007 -VPMOVWBZmr 5008 -VPMOVWBZmrk 5009 -VPMOVWBZrr 5010 -VPMOVWBZrrk 5011 -VPMOVWBZrrkz 5012 -VPMOVZXBDYrm 5013 -VPMOVZXBDYrr 5014 -VPMOVZXBDZ 5015 -VPMOVZXBDZrm 5016 -VPMOVZXBDZrmk 5017 -VPMOVZXBDZrmkz 5018 -VPMOVZXBDZrr 5019 -VPMOVZXBDZrrk 5020 -VPMOVZXBDZrrkz 5021 -VPMOVZXBDrm 5022 -VPMOVZXBDrr 5023 -VPMOVZXBQYrm 5024 -VPMOVZXBQYrr 5025 -VPMOVZXBQZ 5026 -VPMOVZXBQZrm 5027 -VPMOVZXBQZrmk 5028 -VPMOVZXBQZrmkz 5029 -VPMOVZXBQZrr 5030 -VPMOVZXBQZrrk 5031 -VPMOVZXBQZrrkz 5032 -VPMOVZXBQrm 5033 -VPMOVZXBQrr 5034 -VPMOVZXBWYrm 5035 -VPMOVZXBWYrr 5036 -VPMOVZXBWZ 5037 -VPMOVZXBWZrm 5038 -VPMOVZXBWZrmk 5039 -VPMOVZXBWZrmkz 5040 -VPMOVZXBWZrr 5041 -VPMOVZXBWZrrk 5042 -VPMOVZXBWZrrkz 5043 -VPMOVZXBWrm 5044 -VPMOVZXBWrr 5045 -VPMOVZXDQYrm 5046 -VPMOVZXDQYrr 5047 -VPMOVZXDQZ 5048 -VPMOVZXDQZrm 5049 -VPMOVZXDQZrmk 5050 -VPMOVZXDQZrmkz 5051 -VPMOVZXDQZrr 5052 -VPMOVZXDQZrrk 5053 -VPMOVZXDQZrrkz 5054 -VPMOVZXDQrm 5055 -VPMOVZXDQrr 5056 -VPMOVZXWDYrm 5057 -VPMOVZXWDYrr 5058 -VPMOVZXWDZ 5059 -VPMOVZXWDZrm 5060 -VPMOVZXWDZrmk 5061 -VPMOVZXWDZrmkz 5062 -VPMOVZXWDZrr 5063 -VPMOVZXWDZrrk 5064 -VPMOVZXWDZrrkz 5065 -VPMOVZXWDrm 5066 -VPMOVZXWDrr 5067 -VPMOVZXWQYrm 5068 -VPMOVZXWQYrr 5069 -VPMOVZXWQZ 5070 -VPMOVZXWQZrm 5071 -VPMOVZXWQZrmk 5072 -VPMOVZXWQZrmkz 5073 -VPMOVZXWQZrr 5074 -VPMOVZXWQZrrk 5075 -VPMOVZXWQZrrkz 5076 -VPMOVZXWQrm 5077 -VPMOVZXWQrr 5078 -VPMULDQYrm 5079 -VPMULDQYrr 5080 -VPMULDQZ 5081 -VPMULDQZrm 5082 -VPMULDQZrmb 5083 -VPMULDQZrmbk 5084 -VPMULDQZrmbkz 5085 -VPMULDQZrmk 5086 -VPMULDQZrmkz 5087 -VPMULDQZrr 5088 -VPMULDQZrrk 5089 -VPMULDQZrrkz 5090 -VPMULDQrm 5091 -VPMULDQrr 5092 -VPMULHRSWYrm 5093 -VPMULHRSWYrr 5094 -VPMULHRSWZ 5095 -VPMULHRSWZrm 5096 -VPMULHRSWZrmk 5097 -VPMULHRSWZrmkz 5098 -VPMULHRSWZrr 5099 -VPMULHRSWZrrk 5100 -VPMULHRSWZrrkz 5101 -VPMULHRSWrm 5102 -VPMULHRSWrr 5103 -VPMULHUWYrm 5104 -VPMULHUWYrr 5105 -VPMULHUWZ 5106 -VPMULHUWZrm 5107 -VPMULHUWZrmk 5108 -VPMULHUWZrmkz 5109 -VPMULHUWZrr 5110 -VPMULHUWZrrk 5111 -VPMULHUWZrrkz 5112 -VPMULHUWrm 5113 -VPMULHUWrr 5114 -VPMULHWYrm 5115 -VPMULHWYrr 5116 -VPMULHWZ 5117 -VPMULHWZrm 5118 -VPMULHWZrmk 5119 -VPMULHWZrmkz 5120 -VPMULHWZrr 5121 -VPMULHWZrrk 5122 -VPMULHWZrrkz 5123 -VPMULHWrm 5124 -VPMULHWrr 5125 -VPMULLDYrm 5126 -VPMULLDYrr 5127 -VPMULLDZ 5128 -VPMULLDZrm 5129 -VPMULLDZrmb 5130 -VPMULLDZrmbk 5131 -VPMULLDZrmbkz 5132 -VPMULLDZrmk 5133 -VPMULLDZrmkz 5134 -VPMULLDZrr 5135 -VPMULLDZrrk 5136 -VPMULLDZrrkz 5137 -VPMULLDrm 5138 -VPMULLDrr 5139 -VPMULLQZ 5140 -VPMULLQZrm 5141 -VPMULLQZrmb 5142 -VPMULLQZrmbk 5143 -VPMULLQZrmbkz 5144 -VPMULLQZrmk 5145 -VPMULLQZrmkz 5146 -VPMULLQZrr 5147 -VPMULLQZrrk 5148 -VPMULLQZrrkz 5149 -VPMULLWYrm 5150 -VPMULLWYrr 5151 -VPMULLWZ 5152 -VPMULLWZrm 5153 -VPMULLWZrmk 5154 -VPMULLWZrmkz 5155 -VPMULLWZrr 5156 -VPMULLWZrrk 5157 -VPMULLWZrrkz 5158 -VPMULLWrm 5159 -VPMULLWrr 5160 -VPMULTISHIFTQBZ 5161 -VPMULTISHIFTQBZrm 5162 -VPMULTISHIFTQBZrmb 5163 -VPMULTISHIFTQBZrmbk 5164 -VPMULTISHIFTQBZrmbkz 5165 -VPMULTISHIFTQBZrmk 5166 -VPMULTISHIFTQBZrmkz 5167 -VPMULTISHIFTQBZrr 5168 -VPMULTISHIFTQBZrrk 5169 -VPMULTISHIFTQBZrrkz 5170 -VPMULUDQYrm 5171 -VPMULUDQYrr 5172 -VPMULUDQZ 5173 -VPMULUDQZrm 5174 -VPMULUDQZrmb 5175 -VPMULUDQZrmbk 5176 -VPMULUDQZrmbkz 5177 -VPMULUDQZrmk 5178 -VPMULUDQZrmkz 5179 -VPMULUDQZrr 5180 -VPMULUDQZrrk 5181 -VPMULUDQZrrkz 5182 -VPMULUDQrm 5183 -VPMULUDQrr 5184 -VPOPCNTBZ 5185 -VPOPCNTBZrm 5186 -VPOPCNTBZrmk 5187 -VPOPCNTBZrmkz 5188 -VPOPCNTBZrr 5189 -VPOPCNTBZrrk 5190 -VPOPCNTBZrrkz 5191 -VPOPCNTDZ 5192 -VPOPCNTDZrm 5193 -VPOPCNTDZrmb 5194 -VPOPCNTDZrmbk 5195 -VPOPCNTDZrmbkz 5196 -VPOPCNTDZrmk 5197 -VPOPCNTDZrmkz 5198 -VPOPCNTDZrr 5199 -VPOPCNTDZrrk 5200 -VPOPCNTDZrrkz 5201 -VPOPCNTQZ 5202 -VPOPCNTQZrm 5203 -VPOPCNTQZrmb 5204 -VPOPCNTQZrmbk 5205 -VPOPCNTQZrmbkz 5206 -VPOPCNTQZrmk 5207 -VPOPCNTQZrmkz 5208 -VPOPCNTQZrr 5209 -VPOPCNTQZrrk 5210 -VPOPCNTQZrrkz 5211 -VPOPCNTWZ 5212 -VPOPCNTWZrm 5213 -VPOPCNTWZrmk 5214 -VPOPCNTWZrmkz 5215 -VPOPCNTWZrr 5216 -VPOPCNTWZrrk 5217 -VPOPCNTWZrrkz 5218 -VPORDZ 5219 -VPORDZrm 5220 -VPORDZrmb 5221 -VPORDZrmbk 5222 -VPORDZrmbkz 5223 -VPORDZrmk 5224 -VPORDZrmkz 5225 -VPORDZrr 5226 -VPORDZrrk 5227 -VPORDZrrkz 5228 -VPORQZ 5229 -VPORQZrm 5230 -VPORQZrmb 5231 -VPORQZrmbk 5232 -VPORQZrmbkz 5233 -VPORQZrmk 5234 -VPORQZrmkz 5235 -VPORQZrr 5236 -VPORQZrrk 5237 -VPORQZrrkz 5238 -VPORYrm 5239 -VPORYrr 5240 -VPORrm 5241 -VPORrr 5242 -VPPERMrmr 5243 -VPPERMrrm 5244 -VPPERMrrr 5245 -VPPERMrrr_REV 5246 -VPROLDZ 5247 -VPROLDZmbi 5248 -VPROLDZmbik 5249 -VPROLDZmbikz 5250 -VPROLDZmi 5251 -VPROLDZmik 5252 -VPROLDZmikz 5253 -VPROLDZri 5254 -VPROLDZrik 5255 -VPROLDZrikz 5256 -VPROLQZ 5257 -VPROLQZmbi 5258 -VPROLQZmbik 5259 -VPROLQZmbikz 5260 -VPROLQZmi 5261 -VPROLQZmik 5262 -VPROLQZmikz 5263 -VPROLQZri 5264 -VPROLQZrik 5265 -VPROLQZrikz 5266 -VPROLVDZ 5267 -VPROLVDZrm 5268 -VPROLVDZrmb 5269 -VPROLVDZrmbk 5270 -VPROLVDZrmbkz 5271 -VPROLVDZrmk 5272 -VPROLVDZrmkz 5273 -VPROLVDZrr 5274 -VPROLVDZrrk 5275 -VPROLVDZrrkz 5276 -VPROLVQZ 5277 -VPROLVQZrm 5278 -VPROLVQZrmb 5279 -VPROLVQZrmbk 5280 -VPROLVQZrmbkz 5281 -VPROLVQZrmk 5282 -VPROLVQZrmkz 5283 -VPROLVQZrr 5284 -VPROLVQZrrk 5285 -VPROLVQZrrkz 5286 -VPRORDZ 5287 -VPRORDZmbi 5288 -VPRORDZmbik 5289 -VPRORDZmbikz 5290 -VPRORDZmi 5291 -VPRORDZmik 5292 -VPRORDZmikz 5293 -VPRORDZri 5294 -VPRORDZrik 5295 -VPRORDZrikz 5296 -VPRORQZ 5297 -VPRORQZmbi 5298 -VPRORQZmbik 5299 -VPRORQZmbikz 5300 -VPRORQZmi 5301 -VPRORQZmik 5302 -VPRORQZmikz 5303 -VPRORQZri 5304 -VPRORQZrik 5305 -VPRORQZrikz 5306 -VPRORVDZ 5307 -VPRORVDZrm 5308 -VPRORVDZrmb 5309 -VPRORVDZrmbk 5310 -VPRORVDZrmbkz 5311 -VPRORVDZrmk 5312 -VPRORVDZrmkz 5313 -VPRORVDZrr 5314 -VPRORVDZrrk 5315 -VPRORVDZrrkz 5316 -VPRORVQZ 5317 -VPRORVQZrm 5318 -VPRORVQZrmb 5319 -VPRORVQZrmbk 5320 -VPRORVQZrmbkz 5321 -VPRORVQZrmk 5322 -VPRORVQZrmkz 5323 -VPRORVQZrr 5324 -VPRORVQZrrk 5325 -VPRORVQZrrkz 5326 -VPROTBmi 5327 -VPROTBmr 5328 -VPROTBri 5329 -VPROTBrm 5330 -VPROTBrr 5331 -VPROTBrr_REV 5332 -VPROTDmi 5333 -VPROTDmr 5334 -VPROTDri 5335 -VPROTDrm 5336 -VPROTDrr 5337 -VPROTDrr_REV 5338 -VPROTQmi 5339 -VPROTQmr 5340 -VPROTQri 5341 -VPROTQrm 5342 -VPROTQrr 5343 -VPROTQrr_REV 5344 -VPROTWmi 5345 -VPROTWmr 5346 -VPROTWri 5347 -VPROTWrm 5348 -VPROTWrr 5349 -VPROTWrr_REV 5350 -VPSADBWYrm 5351 -VPSADBWYrr 5352 -VPSADBWZ 5353 -VPSADBWZrm 5354 -VPSADBWZrr 5355 -VPSADBWrm 5356 -VPSADBWrr 5357 -VPSCATTERDDZ 5358 -VPSCATTERDDZmr 5359 -VPSCATTERDQZ 5360 -VPSCATTERDQZmr 5361 -VPSCATTERQDZ 5362 -VPSCATTERQDZmr 5363 -VPSCATTERQQZ 5364 -VPSCATTERQQZmr 5365 -VPSHABmr 5366 -VPSHABrm 5367 -VPSHABrr 5368 -VPSHABrr_REV 5369 -VPSHADmr 5370 -VPSHADrm 5371 -VPSHADrr 5372 -VPSHADrr_REV 5373 -VPSHAQmr 5374 -VPSHAQrm 5375 -VPSHAQrr 5376 -VPSHAQrr_REV 5377 -VPSHAWmr 5378 -VPSHAWrm 5379 -VPSHAWrr 5380 -VPSHAWrr_REV 5381 -VPSHLBmr 5382 -VPSHLBrm 5383 -VPSHLBrr 5384 -VPSHLBrr_REV 5385 -VPSHLDDZ 5386 -VPSHLDDZrmbi 5387 -VPSHLDDZrmbik 5388 -VPSHLDDZrmbikz 5389 -VPSHLDDZrmi 5390 -VPSHLDDZrmik 5391 -VPSHLDDZrmikz 5392 -VPSHLDDZrri 5393 -VPSHLDDZrrik 5394 -VPSHLDDZrrikz 5395 -VPSHLDQZ 5396 -VPSHLDQZrmbi 5397 -VPSHLDQZrmbik 5398 -VPSHLDQZrmbikz 5399 -VPSHLDQZrmi 5400 -VPSHLDQZrmik 5401 -VPSHLDQZrmikz 5402 -VPSHLDQZrri 5403 -VPSHLDQZrrik 5404 -VPSHLDQZrrikz 5405 -VPSHLDVDZ 5406 -VPSHLDVDZm 5407 -VPSHLDVDZmb 5408 -VPSHLDVDZmbk 5409 -VPSHLDVDZmbkz 5410 -VPSHLDVDZmk 5411 -VPSHLDVDZmkz 5412 -VPSHLDVDZr 5413 -VPSHLDVDZrk 5414 -VPSHLDVDZrkz 5415 -VPSHLDVQZ 5416 -VPSHLDVQZm 5417 -VPSHLDVQZmb 5418 -VPSHLDVQZmbk 5419 -VPSHLDVQZmbkz 5420 -VPSHLDVQZmk 5421 -VPSHLDVQZmkz 5422 -VPSHLDVQZr 5423 -VPSHLDVQZrk 5424 -VPSHLDVQZrkz 5425 -VPSHLDVWZ 5426 -VPSHLDVWZm 5427 -VPSHLDVWZmk 5428 -VPSHLDVWZmkz 5429 -VPSHLDVWZr 5430 -VPSHLDVWZrk 5431 -VPSHLDVWZrkz 5432 -VPSHLDWZ 5433 -VPSHLDWZrmi 5434 -VPSHLDWZrmik 5435 -VPSHLDWZrmikz 5436 -VPSHLDWZrri 5437 -VPSHLDWZrrik 5438 -VPSHLDWZrrikz 5439 -VPSHLDmr 5440 -VPSHLDrm 5441 -VPSHLDrr 5442 -VPSHLDrr_REV 5443 -VPSHLQmr 5444 -VPSHLQrm 5445 -VPSHLQrr 5446 -VPSHLQrr_REV 5447 -VPSHLWmr 5448 -VPSHLWrm 5449 -VPSHLWrr 5450 -VPSHLWrr_REV 5451 -VPSHRDDZ 5452 -VPSHRDDZrmbi 5453 -VPSHRDDZrmbik 5454 -VPSHRDDZrmbikz 5455 -VPSHRDDZrmi 5456 -VPSHRDDZrmik 5457 -VPSHRDDZrmikz 5458 -VPSHRDDZrri 5459 -VPSHRDDZrrik 5460 -VPSHRDDZrrikz 5461 -VPSHRDQZ 5462 -VPSHRDQZrmbi 5463 -VPSHRDQZrmbik 5464 -VPSHRDQZrmbikz 5465 -VPSHRDQZrmi 5466 -VPSHRDQZrmik 5467 -VPSHRDQZrmikz 5468 -VPSHRDQZrri 5469 -VPSHRDQZrrik 5470 -VPSHRDQZrrikz 5471 -VPSHRDVDZ 5472 -VPSHRDVDZm 5473 -VPSHRDVDZmb 5474 -VPSHRDVDZmbk 5475 -VPSHRDVDZmbkz 5476 -VPSHRDVDZmk 5477 -VPSHRDVDZmkz 5478 -VPSHRDVDZr 5479 -VPSHRDVDZrk 5480 -VPSHRDVDZrkz 5481 -VPSHRDVQZ 5482 -VPSHRDVQZm 5483 -VPSHRDVQZmb 5484 -VPSHRDVQZmbk 5485 -VPSHRDVQZmbkz 5486 -VPSHRDVQZmk 5487 -VPSHRDVQZmkz 5488 -VPSHRDVQZr 5489 -VPSHRDVQZrk 5490 -VPSHRDVQZrkz 5491 -VPSHRDVWZ 5492 -VPSHRDVWZm 5493 -VPSHRDVWZmk 5494 -VPSHRDVWZmkz 5495 -VPSHRDVWZr 5496 -VPSHRDVWZrk 5497 -VPSHRDVWZrkz 5498 -VPSHRDWZ 5499 -VPSHRDWZrmi 5500 -VPSHRDWZrmik 5501 -VPSHRDWZrmikz 5502 -VPSHRDWZrri 5503 -VPSHRDWZrrik 5504 -VPSHRDWZrrikz 5505 -VPSHUFBITQMBZ 5506 -VPSHUFBITQMBZrm 5507 -VPSHUFBITQMBZrmk 5508 -VPSHUFBITQMBZrr 5509 -VPSHUFBITQMBZrrk 5510 -VPSHUFBYrm 5511 -VPSHUFBYrr 5512 -VPSHUFBZ 5513 -VPSHUFBZrm 5514 -VPSHUFBZrmk 5515 -VPSHUFBZrmkz 5516 -VPSHUFBZrr 5517 -VPSHUFBZrrk 5518 -VPSHUFBZrrkz 5519 -VPSHUFBrm 5520 -VPSHUFBrr 5521 -VPSHUFDYmi 5522 -VPSHUFDYri 5523 -VPSHUFDZ 5524 -VPSHUFDZmbi 5525 -VPSHUFDZmbik 5526 -VPSHUFDZmbikz 5527 -VPSHUFDZmi 5528 -VPSHUFDZmik 5529 -VPSHUFDZmikz 5530 -VPSHUFDZri 5531 -VPSHUFDZrik 5532 -VPSHUFDZrikz 5533 -VPSHUFDmi 5534 -VPSHUFDri 5535 -VPSHUFHWYmi 5536 -VPSHUFHWYri 5537 -VPSHUFHWZ 5538 -VPSHUFHWZmi 5539 -VPSHUFHWZmik 5540 -VPSHUFHWZmikz 5541 -VPSHUFHWZri 5542 -VPSHUFHWZrik 5543 -VPSHUFHWZrikz 5544 -VPSHUFHWmi 5545 -VPSHUFHWri 5546 -VPSHUFLWYmi 5547 -VPSHUFLWYri 5548 -VPSHUFLWZ 5549 -VPSHUFLWZmi 5550 -VPSHUFLWZmik 5551 -VPSHUFLWZmikz 5552 -VPSHUFLWZri 5553 -VPSHUFLWZrik 5554 -VPSHUFLWZrikz 5555 -VPSHUFLWmi 5556 -VPSHUFLWri 5557 -VPSIGNBYrm 5558 -VPSIGNBYrr 5559 -VPSIGNBrm 5560 -VPSIGNBrr 5561 -VPSIGNDYrm 5562 -VPSIGNDYrr 5563 -VPSIGNDrm 5564 -VPSIGNDrr 5565 -VPSIGNWYrm 5566 -VPSIGNWYrr 5567 -VPSIGNWrm 5568 -VPSIGNWrr 5569 -VPSLLDQYri 5570 -VPSLLDQZ 5571 -VPSLLDQZmi 5572 -VPSLLDQZri 5573 -VPSLLDQri 5574 -VPSLLDYri 5575 -VPSLLDYrm 5576 -VPSLLDYrr 5577 -VPSLLDZ 5578 -VPSLLDZmbi 5579 -VPSLLDZmbik 5580 -VPSLLDZmbikz 5581 -VPSLLDZmi 5582 -VPSLLDZmik 5583 -VPSLLDZmikz 5584 -VPSLLDZri 5585 -VPSLLDZrik 5586 -VPSLLDZrikz 5587 -VPSLLDZrm 5588 -VPSLLDZrmk 5589 -VPSLLDZrmkz 5590 -VPSLLDZrr 5591 -VPSLLDZrrk 5592 -VPSLLDZrrkz 5593 -VPSLLDri 5594 -VPSLLDrm 5595 -VPSLLDrr 5596 -VPSLLQYri 5597 -VPSLLQYrm 5598 -VPSLLQYrr 5599 -VPSLLQZ 5600 -VPSLLQZmbi 5601 -VPSLLQZmbik 5602 -VPSLLQZmbikz 5603 -VPSLLQZmi 5604 -VPSLLQZmik 5605 -VPSLLQZmikz 5606 -VPSLLQZri 5607 -VPSLLQZrik 5608 -VPSLLQZrikz 5609 -VPSLLQZrm 5610 -VPSLLQZrmk 5611 -VPSLLQZrmkz 5612 -VPSLLQZrr 5613 -VPSLLQZrrk 5614 -VPSLLQZrrkz 5615 -VPSLLQri 5616 -VPSLLQrm 5617 -VPSLLQrr 5618 -VPSLLVDYrm 5619 -VPSLLVDYrr 5620 -VPSLLVDZ 5621 -VPSLLVDZrm 5622 -VPSLLVDZrmb 5623 -VPSLLVDZrmbk 5624 -VPSLLVDZrmbkz 5625 -VPSLLVDZrmk 5626 -VPSLLVDZrmkz 5627 -VPSLLVDZrr 5628 -VPSLLVDZrrk 5629 -VPSLLVDZrrkz 5630 -VPSLLVDrm 5631 -VPSLLVDrr 5632 -VPSLLVQYrm 5633 -VPSLLVQYrr 5634 -VPSLLVQZ 5635 -VPSLLVQZrm 5636 -VPSLLVQZrmb 5637 -VPSLLVQZrmbk 5638 -VPSLLVQZrmbkz 5639 -VPSLLVQZrmk 5640 -VPSLLVQZrmkz 5641 -VPSLLVQZrr 5642 -VPSLLVQZrrk 5643 -VPSLLVQZrrkz 5644 -VPSLLVQrm 5645 -VPSLLVQrr 5646 -VPSLLVWZ 5647 -VPSLLVWZrm 5648 -VPSLLVWZrmk 5649 -VPSLLVWZrmkz 5650 -VPSLLVWZrr 5651 -VPSLLVWZrrk 5652 -VPSLLVWZrrkz 5653 -VPSLLWYri 5654 -VPSLLWYrm 5655 -VPSLLWYrr 5656 -VPSLLWZ 5657 -VPSLLWZmi 5658 -VPSLLWZmik 5659 -VPSLLWZmikz 5660 -VPSLLWZri 5661 -VPSLLWZrik 5662 -VPSLLWZrikz 5663 -VPSLLWZrm 5664 -VPSLLWZrmk 5665 -VPSLLWZrmkz 5666 -VPSLLWZrr 5667 -VPSLLWZrrk 5668 -VPSLLWZrrkz 5669 -VPSLLWri 5670 -VPSLLWrm 5671 -VPSLLWrr 5672 -VPSRADYri 5673 -VPSRADYrm 5674 -VPSRADYrr 5675 -VPSRADZ 5676 -VPSRADZmbi 5677 -VPSRADZmbik 5678 -VPSRADZmbikz 5679 -VPSRADZmi 5680 -VPSRADZmik 5681 -VPSRADZmikz 5682 -VPSRADZri 5683 -VPSRADZrik 5684 -VPSRADZrikz 5685 -VPSRADZrm 5686 -VPSRADZrmk 5687 -VPSRADZrmkz 5688 -VPSRADZrr 5689 -VPSRADZrrk 5690 -VPSRADZrrkz 5691 -VPSRADri 5692 -VPSRADrm 5693 -VPSRADrr 5694 -VPSRAQZ 5695 -VPSRAQZmbi 5696 -VPSRAQZmbik 5697 -VPSRAQZmbikz 5698 -VPSRAQZmi 5699 -VPSRAQZmik 5700 -VPSRAQZmikz 5701 -VPSRAQZri 5702 -VPSRAQZrik 5703 -VPSRAQZrikz 5704 -VPSRAQZrm 5705 -VPSRAQZrmk 5706 -VPSRAQZrmkz 5707 -VPSRAQZrr 5708 -VPSRAQZrrk 5709 -VPSRAQZrrkz 5710 -VPSRAVDYrm 5711 -VPSRAVDYrr 5712 -VPSRAVDZ 5713 -VPSRAVDZrm 5714 -VPSRAVDZrmb 5715 -VPSRAVDZrmbk 5716 -VPSRAVDZrmbkz 5717 -VPSRAVDZrmk 5718 -VPSRAVDZrmkz 5719 -VPSRAVDZrr 5720 -VPSRAVDZrrk 5721 -VPSRAVDZrrkz 5722 -VPSRAVDrm 5723 -VPSRAVDrr 5724 -VPSRAVQZ 5725 -VPSRAVQZrm 5726 -VPSRAVQZrmb 5727 -VPSRAVQZrmbk 5728 -VPSRAVQZrmbkz 5729 -VPSRAVQZrmk 5730 -VPSRAVQZrmkz 5731 -VPSRAVQZrr 5732 -VPSRAVQZrrk 5733 -VPSRAVQZrrkz 5734 -VPSRAVWZ 5735 -VPSRAVWZrm 5736 -VPSRAVWZrmk 5737 -VPSRAVWZrmkz 5738 -VPSRAVWZrr 5739 -VPSRAVWZrrk 5740 -VPSRAVWZrrkz 5741 -VPSRAWYri 5742 -VPSRAWYrm 5743 -VPSRAWYrr 5744 -VPSRAWZ 5745 -VPSRAWZmi 5746 -VPSRAWZmik 5747 -VPSRAWZmikz 5748 -VPSRAWZri 5749 -VPSRAWZrik 5750 -VPSRAWZrikz 5751 -VPSRAWZrm 5752 -VPSRAWZrmk 5753 -VPSRAWZrmkz 5754 -VPSRAWZrr 5755 -VPSRAWZrrk 5756 -VPSRAWZrrkz 5757 -VPSRAWri 5758 -VPSRAWrm 5759 -VPSRAWrr 5760 -VPSRLDQYri 5761 -VPSRLDQZ 5762 -VPSRLDQZmi 5763 -VPSRLDQZri 5764 -VPSRLDQri 5765 -VPSRLDYri 5766 -VPSRLDYrm 5767 -VPSRLDYrr 5768 -VPSRLDZ 5769 -VPSRLDZmbi 5770 -VPSRLDZmbik 5771 -VPSRLDZmbikz 5772 -VPSRLDZmi 5773 -VPSRLDZmik 5774 -VPSRLDZmikz 5775 -VPSRLDZri 5776 -VPSRLDZrik 5777 -VPSRLDZrikz 5778 -VPSRLDZrm 5779 -VPSRLDZrmk 5780 -VPSRLDZrmkz 5781 -VPSRLDZrr 5782 -VPSRLDZrrk 5783 -VPSRLDZrrkz 5784 -VPSRLDri 5785 -VPSRLDrm 5786 -VPSRLDrr 5787 -VPSRLQYri 5788 -VPSRLQYrm 5789 -VPSRLQYrr 5790 -VPSRLQZ 5791 -VPSRLQZmbi 5792 -VPSRLQZmbik 5793 -VPSRLQZmbikz 5794 -VPSRLQZmi 5795 -VPSRLQZmik 5796 -VPSRLQZmikz 5797 -VPSRLQZri 5798 -VPSRLQZrik 5799 -VPSRLQZrikz 5800 -VPSRLQZrm 5801 -VPSRLQZrmk 5802 -VPSRLQZrmkz 5803 -VPSRLQZrr 5804 -VPSRLQZrrk 5805 -VPSRLQZrrkz 5806 -VPSRLQri 5807 -VPSRLQrm 5808 -VPSRLQrr 5809 -VPSRLVDYrm 5810 -VPSRLVDYrr 5811 -VPSRLVDZ 5812 -VPSRLVDZrm 5813 -VPSRLVDZrmb 5814 -VPSRLVDZrmbk 5815 -VPSRLVDZrmbkz 5816 -VPSRLVDZrmk 5817 -VPSRLVDZrmkz 5818 -VPSRLVDZrr 5819 -VPSRLVDZrrk 5820 -VPSRLVDZrrkz 5821 -VPSRLVDrm 5822 -VPSRLVDrr 5823 -VPSRLVQYrm 5824 -VPSRLVQYrr 5825 -VPSRLVQZ 5826 -VPSRLVQZrm 5827 -VPSRLVQZrmb 5828 -VPSRLVQZrmbk 5829 -VPSRLVQZrmbkz 5830 -VPSRLVQZrmk 5831 -VPSRLVQZrmkz 5832 -VPSRLVQZrr 5833 -VPSRLVQZrrk 5834 -VPSRLVQZrrkz 5835 -VPSRLVQrm 5836 -VPSRLVQrr 5837 -VPSRLVWZ 5838 -VPSRLVWZrm 5839 -VPSRLVWZrmk 5840 -VPSRLVWZrmkz 5841 -VPSRLVWZrr 5842 -VPSRLVWZrrk 5843 -VPSRLVWZrrkz 5844 -VPSRLWYri 5845 -VPSRLWYrm 5846 -VPSRLWYrr 5847 -VPSRLWZ 5848 -VPSRLWZmi 5849 -VPSRLWZmik 5850 -VPSRLWZmikz 5851 -VPSRLWZri 5852 -VPSRLWZrik 5853 -VPSRLWZrikz 5854 -VPSRLWZrm 5855 -VPSRLWZrmk 5856 -VPSRLWZrmkz 5857 -VPSRLWZrr 5858 -VPSRLWZrrk 5859 -VPSRLWZrrkz 5860 -VPSRLWri 5861 -VPSRLWrm 5862 -VPSRLWrr 5863 -VPSUBBYrm 5864 -VPSUBBYrr 5865 -VPSUBBZ 5866 -VPSUBBZrm 5867 -VPSUBBZrmk 5868 -VPSUBBZrmkz 5869 -VPSUBBZrr 5870 -VPSUBBZrrk 5871 -VPSUBBZrrkz 5872 -VPSUBBrm 5873 -VPSUBBrr 5874 -VPSUBDYrm 5875 -VPSUBDYrr 5876 -VPSUBDZ 5877 -VPSUBDZrm 5878 -VPSUBDZrmb 5879 -VPSUBDZrmbk 5880 -VPSUBDZrmbkz 5881 -VPSUBDZrmk 5882 -VPSUBDZrmkz 5883 -VPSUBDZrr 5884 -VPSUBDZrrk 5885 -VPSUBDZrrkz 5886 -VPSUBDrm 5887 -VPSUBDrr 5888 -VPSUBQYrm 5889 -VPSUBQYrr 5890 -VPSUBQZ 5891 -VPSUBQZrm 5892 -VPSUBQZrmb 5893 -VPSUBQZrmbk 5894 -VPSUBQZrmbkz 5895 -VPSUBQZrmk 5896 -VPSUBQZrmkz 5897 -VPSUBQZrr 5898 -VPSUBQZrrk 5899 -VPSUBQZrrkz 5900 -VPSUBQrm 5901 -VPSUBQrr 5902 -VPSUBSBYrm 5903 -VPSUBSBYrr 5904 -VPSUBSBZ 5905 -VPSUBSBZrm 5906 -VPSUBSBZrmk 5907 -VPSUBSBZrmkz 5908 -VPSUBSBZrr 5909 -VPSUBSBZrrk 5910 -VPSUBSBZrrkz 5911 -VPSUBSBrm 5912 -VPSUBSBrr 5913 -VPSUBSWYrm 5914 -VPSUBSWYrr 5915 -VPSUBSWZ 5916 -VPSUBSWZrm 5917 -VPSUBSWZrmk 5918 -VPSUBSWZrmkz 5919 -VPSUBSWZrr 5920 -VPSUBSWZrrk 5921 -VPSUBSWZrrkz 5922 -VPSUBSWrm 5923 -VPSUBSWrr 5924 -VPSUBUSBYrm 5925 -VPSUBUSBYrr 5926 -VPSUBUSBZ 5927 -VPSUBUSBZrm 5928 -VPSUBUSBZrmk 5929 -VPSUBUSBZrmkz 5930 -VPSUBUSBZrr 5931 -VPSUBUSBZrrk 5932 -VPSUBUSBZrrkz 5933 -VPSUBUSBrm 5934 -VPSUBUSBrr 5935 -VPSUBUSWYrm 5936 -VPSUBUSWYrr 5937 -VPSUBUSWZ 5938 -VPSUBUSWZrm 5939 -VPSUBUSWZrmk 5940 -VPSUBUSWZrmkz 5941 -VPSUBUSWZrr 5942 -VPSUBUSWZrrk 5943 -VPSUBUSWZrrkz 5944 -VPSUBUSWrm 5945 -VPSUBUSWrr 5946 -VPSUBWYrm 5947 -VPSUBWYrr 5948 -VPSUBWZ 5949 -VPSUBWZrm 5950 -VPSUBWZrmk 5951 -VPSUBWZrmkz 5952 -VPSUBWZrr 5953 -VPSUBWZrrk 5954 -VPSUBWZrrkz 5955 -VPSUBWrm 5956 -VPSUBWrr 5957 -VPTERNLOGDZ 5958 -VPTERNLOGDZrmbi 5959 -VPTERNLOGDZrmbik 5960 -VPTERNLOGDZrmbikz 5961 -VPTERNLOGDZrmi 5962 -VPTERNLOGDZrmik 5963 -VPTERNLOGDZrmikz 5964 -VPTERNLOGDZrri 5965 -VPTERNLOGDZrrik 5966 -VPTERNLOGDZrrikz 5967 -VPTERNLOGQZ 5968 -VPTERNLOGQZrmbi 5969 -VPTERNLOGQZrmbik 5970 -VPTERNLOGQZrmbikz 5971 -VPTERNLOGQZrmi 5972 -VPTERNLOGQZrmik 5973 -VPTERNLOGQZrmikz 5974 -VPTERNLOGQZrri 5975 -VPTERNLOGQZrrik 5976 -VPTERNLOGQZrrikz 5977 -VPTESTMBZ 5978 -VPTESTMBZrm 5979 -VPTESTMBZrmk 5980 -VPTESTMBZrr 5981 -VPTESTMBZrrk 5982 -VPTESTMDZ 5983 -VPTESTMDZrm 5984 -VPTESTMDZrmb 5985 -VPTESTMDZrmbk 5986 -VPTESTMDZrmk 5987 -VPTESTMDZrr 5988 -VPTESTMDZrrk 5989 -VPTESTMQZ 5990 -VPTESTMQZrm 5991 -VPTESTMQZrmb 5992 -VPTESTMQZrmbk 5993 -VPTESTMQZrmk 5994 -VPTESTMQZrr 5995 -VPTESTMQZrrk 5996 -VPTESTMWZ 5997 -VPTESTMWZrm 5998 -VPTESTMWZrmk 5999 -VPTESTMWZrr 6000 -VPTESTMWZrrk 6001 -VPTESTNMBZ 6002 -VPTESTNMBZrm 6003 -VPTESTNMBZrmk 6004 -VPTESTNMBZrr 6005 -VPTESTNMBZrrk 6006 -VPTESTNMDZ 6007 -VPTESTNMDZrm 6008 -VPTESTNMDZrmb 6009 -VPTESTNMDZrmbk 6010 -VPTESTNMDZrmk 6011 -VPTESTNMDZrr 6012 -VPTESTNMDZrrk 6013 -VPTESTNMQZ 6014 -VPTESTNMQZrm 6015 -VPTESTNMQZrmb 6016 -VPTESTNMQZrmbk 6017 -VPTESTNMQZrmk 6018 -VPTESTNMQZrr 6019 -VPTESTNMQZrrk 6020 -VPTESTNMWZ 6021 -VPTESTNMWZrm 6022 -VPTESTNMWZrmk 6023 -VPTESTNMWZrr 6024 -VPTESTNMWZrrk 6025 -VPTESTYrm 6026 -VPTESTYrr 6027 -VPTESTrm 6028 -VPTESTrr 6029 -VPUNPCKHBWYrm 6030 -VPUNPCKHBWYrr 6031 -VPUNPCKHBWZ 6032 -VPUNPCKHBWZrm 6033 -VPUNPCKHBWZrmk 6034 -VPUNPCKHBWZrmkz 6035 -VPUNPCKHBWZrr 6036 -VPUNPCKHBWZrrk 6037 -VPUNPCKHBWZrrkz 6038 -VPUNPCKHBWrm 6039 -VPUNPCKHBWrr 6040 -VPUNPCKHDQYrm 6041 -VPUNPCKHDQYrr 6042 -VPUNPCKHDQZ 6043 -VPUNPCKHDQZrm 6044 -VPUNPCKHDQZrmb 6045 -VPUNPCKHDQZrmbk 6046 -VPUNPCKHDQZrmbkz 6047 -VPUNPCKHDQZrmk 6048 -VPUNPCKHDQZrmkz 6049 -VPUNPCKHDQZrr 6050 -VPUNPCKHDQZrrk 6051 -VPUNPCKHDQZrrkz 6052 -VPUNPCKHDQrm 6053 -VPUNPCKHDQrr 6054 -VPUNPCKHQDQYrm 6055 -VPUNPCKHQDQYrr 6056 -VPUNPCKHQDQZ 6057 -VPUNPCKHQDQZrm 6058 -VPUNPCKHQDQZrmb 6059 -VPUNPCKHQDQZrmbk 6060 -VPUNPCKHQDQZrmbkz 6061 -VPUNPCKHQDQZrmk 6062 -VPUNPCKHQDQZrmkz 6063 -VPUNPCKHQDQZrr 6064 -VPUNPCKHQDQZrrk 6065 -VPUNPCKHQDQZrrkz 6066 -VPUNPCKHQDQrm 6067 -VPUNPCKHQDQrr 6068 -VPUNPCKHWDYrm 6069 -VPUNPCKHWDYrr 6070 -VPUNPCKHWDZ 6071 -VPUNPCKHWDZrm 6072 -VPUNPCKHWDZrmk 6073 -VPUNPCKHWDZrmkz 6074 -VPUNPCKHWDZrr 6075 -VPUNPCKHWDZrrk 6076 -VPUNPCKHWDZrrkz 6077 -VPUNPCKHWDrm 6078 -VPUNPCKHWDrr 6079 -VPUNPCKLBWYrm 6080 -VPUNPCKLBWYrr 6081 -VPUNPCKLBWZ 6082 -VPUNPCKLBWZrm 6083 -VPUNPCKLBWZrmk 6084 -VPUNPCKLBWZrmkz 6085 -VPUNPCKLBWZrr 6086 -VPUNPCKLBWZrrk 6087 -VPUNPCKLBWZrrkz 6088 -VPUNPCKLBWrm 6089 -VPUNPCKLBWrr 6090 -VPUNPCKLDQYrm 6091 -VPUNPCKLDQYrr 6092 -VPUNPCKLDQZ 6093 -VPUNPCKLDQZrm 6094 -VPUNPCKLDQZrmb 6095 -VPUNPCKLDQZrmbk 6096 -VPUNPCKLDQZrmbkz 6097 -VPUNPCKLDQZrmk 6098 -VPUNPCKLDQZrmkz 6099 -VPUNPCKLDQZrr 6100 -VPUNPCKLDQZrrk 6101 -VPUNPCKLDQZrrkz 6102 -VPUNPCKLDQrm 6103 -VPUNPCKLDQrr 6104 -VPUNPCKLQDQYrm 6105 -VPUNPCKLQDQYrr 6106 -VPUNPCKLQDQZ 6107 -VPUNPCKLQDQZrm 6108 -VPUNPCKLQDQZrmb 6109 -VPUNPCKLQDQZrmbk 6110 -VPUNPCKLQDQZrmbkz 6111 -VPUNPCKLQDQZrmk 6112 -VPUNPCKLQDQZrmkz 6113 -VPUNPCKLQDQZrr 6114 -VPUNPCKLQDQZrrk 6115 -VPUNPCKLQDQZrrkz 6116 -VPUNPCKLQDQrm 6117 -VPUNPCKLQDQrr 6118 -VPUNPCKLWDYrm 6119 -VPUNPCKLWDYrr 6120 -VPUNPCKLWDZ 6121 -VPUNPCKLWDZrm 6122 -VPUNPCKLWDZrmk 6123 -VPUNPCKLWDZrmkz 6124 -VPUNPCKLWDZrr 6125 -VPUNPCKLWDZrrk 6126 -VPUNPCKLWDZrrkz 6127 -VPUNPCKLWDrm 6128 -VPUNPCKLWDrr 6129 -VPXORDZ 6130 -VPXORDZrm 6131 -VPXORDZrmb 6132 -VPXORDZrmbk 6133 -VPXORDZrmbkz 6134 -VPXORDZrmk 6135 -VPXORDZrmkz 6136 -VPXORDZrr 6137 -VPXORDZrrk 6138 -VPXORDZrrkz 6139 -VPXORQZ 6140 -VPXORQZrm 6141 -VPXORQZrmb 6142 -VPXORQZrmbk 6143 -VPXORQZrmbkz 6144 -VPXORQZrmk 6145 -VPXORQZrmkz 6146 -VPXORQZrr 6147 -VPXORQZrrk 6148 -VPXORQZrrkz 6149 -VPXORYrm 6150 -VPXORYrr 6151 -VPXORrm 6152 -VPXORrr 6153 -VRANGEPDZ 6154 -VRANGEPDZrmbi 6155 -VRANGEPDZrmbik 6156 -VRANGEPDZrmbikz 6157 -VRANGEPDZrmi 6158 -VRANGEPDZrmik 6159 -VRANGEPDZrmikz 6160 -VRANGEPDZrri 6161 -VRANGEPDZrrib 6162 -VRANGEPDZrribk 6163 -VRANGEPDZrribkz 6164 -VRANGEPDZrrik 6165 -VRANGEPDZrrikz 6166 -VRANGEPSZ 6167 -VRANGEPSZrmbi 6168 -VRANGEPSZrmbik 6169 -VRANGEPSZrmbikz 6170 -VRANGEPSZrmi 6171 -VRANGEPSZrmik 6172 -VRANGEPSZrmikz 6173 -VRANGEPSZrri 6174 -VRANGEPSZrrib 6175 -VRANGEPSZrribk 6176 -VRANGEPSZrribkz 6177 -VRANGEPSZrrik 6178 -VRANGEPSZrrikz 6179 -VRANGESDZrmi 6180 -VRANGESDZrmik 6181 -VRANGESDZrmikz 6182 -VRANGESDZrri 6183 -VRANGESDZrrib 6184 -VRANGESDZrribk 6185 -VRANGESDZrribkz 6186 -VRANGESDZrrik 6187 -VRANGESDZrrikz 6188 -VRANGESSZrmi 6189 -VRANGESSZrmik 6190 -VRANGESSZrmikz 6191 -VRANGESSZrri 6192 -VRANGESSZrrib 6193 -VRANGESSZrribk 6194 -VRANGESSZrribkz 6195 -VRANGESSZrrik 6196 -VRANGESSZrrikz 6197 -VRCP 6198 -VRCPBF 6199 -VRCPPHZ 6200 -VRCPPHZm 6201 -VRCPPHZmb 6202 -VRCPPHZmbk 6203 -VRCPPHZmbkz 6204 -VRCPPHZmk 6205 -VRCPPHZmkz 6206 -VRCPPHZr 6207 -VRCPPHZrk 6208 -VRCPPHZrkz 6209 -VRCPPSYm 6210 -VRCPPSYr 6211 -VRCPPSm 6212 -VRCPPSr 6213 -VRCPSHZrm 6214 -VRCPSHZrmk 6215 -VRCPSHZrmkz 6216 -VRCPSHZrr 6217 -VRCPSHZrrk 6218 -VRCPSHZrrkz 6219 -VRCPSSm 6220 -VRCPSSm_Int 6221 -VRCPSSr 6222 -VRCPSSr_Int 6223 -VREDUCEBF 6224 -VREDUCEPDZ 6225 -VREDUCEPDZrmbi 6226 -VREDUCEPDZrmbik 6227 -VREDUCEPDZrmbikz 6228 -VREDUCEPDZrmi 6229 -VREDUCEPDZrmik 6230 -VREDUCEPDZrmikz 6231 -VREDUCEPDZrri 6232 -VREDUCEPDZrrib 6233 -VREDUCEPDZrribk 6234 -VREDUCEPDZrribkz 6235 -VREDUCEPDZrrik 6236 -VREDUCEPDZrrikz 6237 -VREDUCEPHZ 6238 -VREDUCEPHZrmbi 6239 -VREDUCEPHZrmbik 6240 -VREDUCEPHZrmbikz 6241 -VREDUCEPHZrmi 6242 -VREDUCEPHZrmik 6243 -VREDUCEPHZrmikz 6244 -VREDUCEPHZrri 6245 -VREDUCEPHZrrib 6246 -VREDUCEPHZrribk 6247 -VREDUCEPHZrribkz 6248 -VREDUCEPHZrrik 6249 -VREDUCEPHZrrikz 6250 -VREDUCEPSZ 6251 -VREDUCEPSZrmbi 6252 -VREDUCEPSZrmbik 6253 -VREDUCEPSZrmbikz 6254 -VREDUCEPSZrmi 6255 -VREDUCEPSZrmik 6256 -VREDUCEPSZrmikz 6257 -VREDUCEPSZrri 6258 -VREDUCEPSZrrib 6259 -VREDUCEPSZrribk 6260 -VREDUCEPSZrribkz 6261 -VREDUCEPSZrrik 6262 -VREDUCEPSZrrikz 6263 -VREDUCESDZrmi 6264 -VREDUCESDZrmik 6265 -VREDUCESDZrmikz 6266 -VREDUCESDZrri 6267 -VREDUCESDZrrib 6268 -VREDUCESDZrribk 6269 -VREDUCESDZrribkz 6270 -VREDUCESDZrrik 6271 -VREDUCESDZrrikz 6272 -VREDUCESHZrmi 6273 -VREDUCESHZrmik 6274 -VREDUCESHZrmikz 6275 -VREDUCESHZrri 6276 -VREDUCESHZrrib 6277 -VREDUCESHZrribk 6278 -VREDUCESHZrribkz 6279 -VREDUCESHZrrik 6280 -VREDUCESHZrrikz 6281 -VREDUCESSZrmi 6282 -VREDUCESSZrmik 6283 -VREDUCESSZrmikz 6284 -VREDUCESSZrri 6285 -VREDUCESSZrrib 6286 -VREDUCESSZrribk 6287 -VREDUCESSZrribkz 6288 -VREDUCESSZrrik 6289 -VREDUCESSZrrikz 6290 -VRNDSCALEBF 6291 -VRNDSCALEPDZ 6292 -VRNDSCALEPDZrmbi 6293 -VRNDSCALEPDZrmbik 6294 -VRNDSCALEPDZrmbikz 6295 -VRNDSCALEPDZrmi 6296 -VRNDSCALEPDZrmik 6297 -VRNDSCALEPDZrmikz 6298 -VRNDSCALEPDZrri 6299 -VRNDSCALEPDZrrib 6300 -VRNDSCALEPDZrribk 6301 -VRNDSCALEPDZrribkz 6302 -VRNDSCALEPDZrrik 6303 -VRNDSCALEPDZrrikz 6304 -VRNDSCALEPHZ 6305 -VRNDSCALEPHZrmbi 6306 -VRNDSCALEPHZrmbik 6307 -VRNDSCALEPHZrmbikz 6308 -VRNDSCALEPHZrmi 6309 -VRNDSCALEPHZrmik 6310 -VRNDSCALEPHZrmikz 6311 -VRNDSCALEPHZrri 6312 -VRNDSCALEPHZrrib 6313 -VRNDSCALEPHZrribk 6314 -VRNDSCALEPHZrribkz 6315 -VRNDSCALEPHZrrik 6316 -VRNDSCALEPHZrrikz 6317 -VRNDSCALEPSZ 6318 -VRNDSCALEPSZrmbi 6319 -VRNDSCALEPSZrmbik 6320 -VRNDSCALEPSZrmbikz 6321 -VRNDSCALEPSZrmi 6322 -VRNDSCALEPSZrmik 6323 -VRNDSCALEPSZrmikz 6324 -VRNDSCALEPSZrri 6325 -VRNDSCALEPSZrrib 6326 -VRNDSCALEPSZrribk 6327 -VRNDSCALEPSZrribkz 6328 -VRNDSCALEPSZrrik 6329 -VRNDSCALEPSZrrikz 6330 -VRNDSCALESDZrmi 6331 -VRNDSCALESDZrmi_Int 6332 -VRNDSCALESDZrmik_Int 6333 -VRNDSCALESDZrmikz_Int 6334 -VRNDSCALESDZrri 6335 -VRNDSCALESDZrri_Int 6336 -VRNDSCALESDZrrib_Int 6337 -VRNDSCALESDZrribk_Int 6338 -VRNDSCALESDZrribkz_Int 6339 -VRNDSCALESDZrrik_Int 6340 -VRNDSCALESDZrrikz_Int 6341 -VRNDSCALESHZrmi 6342 -VRNDSCALESHZrmi_Int 6343 -VRNDSCALESHZrmik_Int 6344 -VRNDSCALESHZrmikz_Int 6345 -VRNDSCALESHZrri 6346 -VRNDSCALESHZrri_Int 6347 -VRNDSCALESHZrrib_Int 6348 -VRNDSCALESHZrribk_Int 6349 -VRNDSCALESHZrribkz_Int 6350 -VRNDSCALESHZrrik_Int 6351 -VRNDSCALESHZrrikz_Int 6352 -VRNDSCALESSZrmi 6353 -VRNDSCALESSZrmi_Int 6354 -VRNDSCALESSZrmik_Int 6355 -VRNDSCALESSZrmikz_Int 6356 -VRNDSCALESSZrri 6357 -VRNDSCALESSZrri_Int 6358 -VRNDSCALESSZrrib_Int 6359 -VRNDSCALESSZrribk_Int 6360 -VRNDSCALESSZrribkz_Int 6361 -VRNDSCALESSZrrik_Int 6362 -VRNDSCALESSZrrikz_Int 6363 -VROUNDPDYmi 6364 -VROUNDPDYri 6365 -VROUNDPDmi 6366 -VROUNDPDri 6367 -VROUNDPSYmi 6368 -VROUNDPSYri 6369 -VROUNDPSmi 6370 -VROUNDPSri 6371 -VROUNDSDmi 6372 -VROUNDSDmi_Int 6373 -VROUNDSDri 6374 -VROUNDSDri_Int 6375 -VROUNDSSmi 6376 -VROUNDSSmi_Int 6377 -VROUNDSSri 6378 -VROUNDSSri_Int 6379 -VRSQRT 6380 -VRSQRTBF 6381 -VRSQRTPHZ 6382 -VRSQRTPHZm 6383 -VRSQRTPHZmb 6384 -VRSQRTPHZmbk 6385 -VRSQRTPHZmbkz 6386 -VRSQRTPHZmk 6387 -VRSQRTPHZmkz 6388 -VRSQRTPHZr 6389 -VRSQRTPHZrk 6390 -VRSQRTPHZrkz 6391 -VRSQRTPSYm 6392 -VRSQRTPSYr 6393 -VRSQRTPSm 6394 -VRSQRTPSr 6395 -VRSQRTSHZrm 6396 -VRSQRTSHZrmk 6397 -VRSQRTSHZrmkz 6398 -VRSQRTSHZrr 6399 -VRSQRTSHZrrk 6400 -VRSQRTSHZrrkz 6401 -VRSQRTSSm 6402 -VRSQRTSSm_Int 6403 -VRSQRTSSr 6404 -VRSQRTSSr_Int 6405 -VSCALEFBF 6406 -VSCALEFPDZ 6407 -VSCALEFPDZrm 6408 -VSCALEFPDZrmb 6409 -VSCALEFPDZrmbk 6410 -VSCALEFPDZrmbkz 6411 -VSCALEFPDZrmk 6412 -VSCALEFPDZrmkz 6413 -VSCALEFPDZrr 6414 -VSCALEFPDZrrb 6415 -VSCALEFPDZrrbk 6416 -VSCALEFPDZrrbkz 6417 -VSCALEFPDZrrk 6418 -VSCALEFPDZrrkz 6419 -VSCALEFPHZ 6420 -VSCALEFPHZrm 6421 -VSCALEFPHZrmb 6422 -VSCALEFPHZrmbk 6423 -VSCALEFPHZrmbkz 6424 -VSCALEFPHZrmk 6425 -VSCALEFPHZrmkz 6426 -VSCALEFPHZrr 6427 -VSCALEFPHZrrb 6428 -VSCALEFPHZrrbk 6429 -VSCALEFPHZrrbkz 6430 -VSCALEFPHZrrk 6431 -VSCALEFPHZrrkz 6432 -VSCALEFPSZ 6433 -VSCALEFPSZrm 6434 -VSCALEFPSZrmb 6435 -VSCALEFPSZrmbk 6436 -VSCALEFPSZrmbkz 6437 -VSCALEFPSZrmk 6438 -VSCALEFPSZrmkz 6439 -VSCALEFPSZrr 6440 -VSCALEFPSZrrb 6441 -VSCALEFPSZrrbk 6442 -VSCALEFPSZrrbkz 6443 -VSCALEFPSZrrk 6444 -VSCALEFPSZrrkz 6445 -VSCALEFSDZrm 6446 -VSCALEFSDZrmk 6447 -VSCALEFSDZrmkz 6448 -VSCALEFSDZrr 6449 -VSCALEFSDZrrb_Int 6450 -VSCALEFSDZrrbk_Int 6451 -VSCALEFSDZrrbkz_Int 6452 -VSCALEFSDZrrk 6453 -VSCALEFSDZrrkz 6454 -VSCALEFSHZrm 6455 -VSCALEFSHZrmk 6456 -VSCALEFSHZrmkz 6457 -VSCALEFSHZrr 6458 -VSCALEFSHZrrb_Int 6459 -VSCALEFSHZrrbk_Int 6460 -VSCALEFSHZrrbkz_Int 6461 -VSCALEFSHZrrk 6462 -VSCALEFSHZrrkz 6463 -VSCALEFSSZrm 6464 -VSCALEFSSZrmk 6465 -VSCALEFSSZrmkz 6466 -VSCALEFSSZrr 6467 -VSCALEFSSZrrb_Int 6468 -VSCALEFSSZrrbk_Int 6469 -VSCALEFSSZrrbkz_Int 6470 -VSCALEFSSZrrk 6471 -VSCALEFSSZrrkz 6472 -VSCATTERDPDZ 6473 -VSCATTERDPDZmr 6474 -VSCATTERDPSZ 6475 -VSCATTERDPSZmr 6476 -VSCATTERPF 6477 -VSCATTERQPDZ 6478 -VSCATTERQPDZmr 6479 -VSCATTERQPSZ 6480 -VSCATTERQPSZmr 6481 -VSHA 6482 -VSHUFF 6483 -VSHUFI 6484 -VSHUFPDYrmi 6485 -VSHUFPDYrri 6486 -VSHUFPDZ 6487 -VSHUFPDZrmbi 6488 -VSHUFPDZrmbik 6489 -VSHUFPDZrmbikz 6490 -VSHUFPDZrmi 6491 -VSHUFPDZrmik 6492 -VSHUFPDZrmikz 6493 -VSHUFPDZrri 6494 -VSHUFPDZrrik 6495 -VSHUFPDZrrikz 6496 -VSHUFPDrmi 6497 -VSHUFPDrri 6498 -VSHUFPSYrmi 6499 -VSHUFPSYrri 6500 -VSHUFPSZ 6501 -VSHUFPSZrmbi 6502 -VSHUFPSZrmbik 6503 -VSHUFPSZrmbikz 6504 -VSHUFPSZrmi 6505 -VSHUFPSZrmik 6506 -VSHUFPSZrmikz 6507 -VSHUFPSZrri 6508 -VSHUFPSZrrik 6509 -VSHUFPSZrrikz 6510 -VSHUFPSrmi 6511 -VSHUFPSrri 6512 -VSM 6513 -VSQRTBF 6514 -VSQRTPDYm 6515 -VSQRTPDYr 6516 -VSQRTPDZ 6517 -VSQRTPDZm 6518 -VSQRTPDZmb 6519 -VSQRTPDZmbk 6520 -VSQRTPDZmbkz 6521 -VSQRTPDZmk 6522 -VSQRTPDZmkz 6523 -VSQRTPDZr 6524 -VSQRTPDZrb 6525 -VSQRTPDZrbk 6526 -VSQRTPDZrbkz 6527 -VSQRTPDZrk 6528 -VSQRTPDZrkz 6529 -VSQRTPDm 6530 -VSQRTPDr 6531 -VSQRTPHZ 6532 -VSQRTPHZm 6533 -VSQRTPHZmb 6534 -VSQRTPHZmbk 6535 -VSQRTPHZmbkz 6536 -VSQRTPHZmk 6537 -VSQRTPHZmkz 6538 -VSQRTPHZr 6539 -VSQRTPHZrb 6540 -VSQRTPHZrbk 6541 -VSQRTPHZrbkz 6542 -VSQRTPHZrk 6543 -VSQRTPHZrkz 6544 -VSQRTPSYm 6545 -VSQRTPSYr 6546 -VSQRTPSZ 6547 -VSQRTPSZm 6548 -VSQRTPSZmb 6549 -VSQRTPSZmbk 6550 -VSQRTPSZmbkz 6551 -VSQRTPSZmk 6552 -VSQRTPSZmkz 6553 -VSQRTPSZr 6554 -VSQRTPSZrb 6555 -VSQRTPSZrbk 6556 -VSQRTPSZrbkz 6557 -VSQRTPSZrk 6558 -VSQRTPSZrkz 6559 -VSQRTPSm 6560 -VSQRTPSr 6561 -VSQRTSDZm 6562 -VSQRTSDZm_Int 6563 -VSQRTSDZmk_Int 6564 -VSQRTSDZmkz_Int 6565 -VSQRTSDZr 6566 -VSQRTSDZr_Int 6567 -VSQRTSDZrb_Int 6568 -VSQRTSDZrbk_Int 6569 -VSQRTSDZrbkz_Int 6570 -VSQRTSDZrk_Int 6571 -VSQRTSDZrkz_Int 6572 -VSQRTSDm 6573 -VSQRTSDm_Int 6574 -VSQRTSDr 6575 -VSQRTSDr_Int 6576 -VSQRTSHZm 6577 -VSQRTSHZm_Int 6578 -VSQRTSHZmk_Int 6579 -VSQRTSHZmkz_Int 6580 -VSQRTSHZr 6581 -VSQRTSHZr_Int 6582 -VSQRTSHZrb_Int 6583 -VSQRTSHZrbk_Int 6584 -VSQRTSHZrbkz_Int 6585 -VSQRTSHZrk_Int 6586 -VSQRTSHZrkz_Int 6587 -VSQRTSSZm 6588 -VSQRTSSZm_Int 6589 -VSQRTSSZmk_Int 6590 -VSQRTSSZmkz_Int 6591 -VSQRTSSZr 6592 -VSQRTSSZr_Int 6593 -VSQRTSSZrb_Int 6594 -VSQRTSSZrbk_Int 6595 -VSQRTSSZrbkz_Int 6596 -VSQRTSSZrk_Int 6597 -VSQRTSSZrkz_Int 6598 -VSQRTSSm 6599 -VSQRTSSm_Int 6600 -VSQRTSSr 6601 -VSQRTSSr_Int 6602 -VSTMXCSR 6603 -VSUBBF 6604 -VSUBPDYrm 6605 -VSUBPDYrr 6606 -VSUBPDZ 6607 -VSUBPDZrm 6608 -VSUBPDZrmb 6609 -VSUBPDZrmbk 6610 -VSUBPDZrmbkz 6611 -VSUBPDZrmk 6612 -VSUBPDZrmkz 6613 -VSUBPDZrr 6614 -VSUBPDZrrb 6615 -VSUBPDZrrbk 6616 -VSUBPDZrrbkz 6617 -VSUBPDZrrk 6618 -VSUBPDZrrkz 6619 -VSUBPDrm 6620 -VSUBPDrr 6621 -VSUBPHZ 6622 -VSUBPHZrm 6623 -VSUBPHZrmb 6624 -VSUBPHZrmbk 6625 -VSUBPHZrmbkz 6626 -VSUBPHZrmk 6627 -VSUBPHZrmkz 6628 -VSUBPHZrr 6629 -VSUBPHZrrb 6630 -VSUBPHZrrbk 6631 -VSUBPHZrrbkz 6632 -VSUBPHZrrk 6633 -VSUBPHZrrkz 6634 -VSUBPSYrm 6635 -VSUBPSYrr 6636 -VSUBPSZ 6637 -VSUBPSZrm 6638 -VSUBPSZrmb 6639 -VSUBPSZrmbk 6640 -VSUBPSZrmbkz 6641 -VSUBPSZrmk 6642 -VSUBPSZrmkz 6643 -VSUBPSZrr 6644 -VSUBPSZrrb 6645 -VSUBPSZrrbk 6646 -VSUBPSZrrbkz 6647 -VSUBPSZrrk 6648 -VSUBPSZrrkz 6649 -VSUBPSrm 6650 -VSUBPSrr 6651 -VSUBSDZrm 6652 -VSUBSDZrm_Int 6653 -VSUBSDZrmk_Int 6654 -VSUBSDZrmkz_Int 6655 -VSUBSDZrr 6656 -VSUBSDZrr_Int 6657 -VSUBSDZrrb_Int 6658 -VSUBSDZrrbk_Int 6659 -VSUBSDZrrbkz_Int 6660 -VSUBSDZrrk_Int 6661 -VSUBSDZrrkz_Int 6662 -VSUBSDrm 6663 -VSUBSDrm_Int 6664 -VSUBSDrr 6665 -VSUBSDrr_Int 6666 -VSUBSHZrm 6667 -VSUBSHZrm_Int 6668 -VSUBSHZrmk_Int 6669 -VSUBSHZrmkz_Int 6670 -VSUBSHZrr 6671 -VSUBSHZrr_Int 6672 -VSUBSHZrrb_Int 6673 -VSUBSHZrrbk_Int 6674 -VSUBSHZrrbkz_Int 6675 -VSUBSHZrrk_Int 6676 -VSUBSHZrrkz_Int 6677 -VSUBSSZrm 6678 -VSUBSSZrm_Int 6679 -VSUBSSZrmk_Int 6680 -VSUBSSZrmkz_Int 6681 -VSUBSSZrr 6682 -VSUBSSZrr_Int 6683 -VSUBSSZrrb_Int 6684 -VSUBSSZrrbk_Int 6685 -VSUBSSZrrbkz_Int 6686 -VSUBSSZrrk_Int 6687 -VSUBSSZrrkz_Int 6688 -VSUBSSrm 6689 -VSUBSSrm_Int 6690 -VSUBSSrr 6691 -VSUBSSrr_Int 6692 -VTESTPDYrm 6693 -VTESTPDYrr 6694 -VTESTPDrm 6695 -VTESTPDrr 6696 -VTESTPSYrm 6697 -VTESTPSYrr 6698 -VTESTPSrm 6699 -VTESTPSrr 6700 -VUCOMISDZrm 6701 -VUCOMISDZrm_Int 6702 -VUCOMISDZrr 6703 -VUCOMISDZrr_Int 6704 -VUCOMISDZrrb 6705 -VUCOMISDrm 6706 -VUCOMISDrm_Int 6707 -VUCOMISDrr 6708 -VUCOMISDrr_Int 6709 -VUCOMISHZrm 6710 -VUCOMISHZrm_Int 6711 -VUCOMISHZrr 6712 -VUCOMISHZrr_Int 6713 -VUCOMISHZrrb 6714 -VUCOMISSZrm 6715 -VUCOMISSZrm_Int 6716 -VUCOMISSZrr 6717 -VUCOMISSZrr_Int 6718 -VUCOMISSZrrb 6719 -VUCOMISSrm 6720 -VUCOMISSrm_Int 6721 -VUCOMISSrr 6722 -VUCOMISSrr_Int 6723 -VUCOMXSDZrm 6724 -VUCOMXSDZrm_Int 6725 -VUCOMXSDZrr 6726 -VUCOMXSDZrr_Int 6727 -VUCOMXSDZrrb_Int 6728 -VUCOMXSHZrm 6729 -VUCOMXSHZrm_Int 6730 -VUCOMXSHZrr 6731 -VUCOMXSHZrr_Int 6732 -VUCOMXSHZrrb_Int 6733 -VUCOMXSSZrm 6734 -VUCOMXSSZrm_Int 6735 -VUCOMXSSZrr 6736 -VUCOMXSSZrr_Int 6737 -VUCOMXSSZrrb_Int 6738 -VUNPCKHPDYrm 6739 -VUNPCKHPDYrr 6740 -VUNPCKHPDZ 6741 -VUNPCKHPDZrm 6742 -VUNPCKHPDZrmb 6743 -VUNPCKHPDZrmbk 6744 -VUNPCKHPDZrmbkz 6745 -VUNPCKHPDZrmk 6746 -VUNPCKHPDZrmkz 6747 -VUNPCKHPDZrr 6748 -VUNPCKHPDZrrk 6749 -VUNPCKHPDZrrkz 6750 -VUNPCKHPDrm 6751 -VUNPCKHPDrr 6752 -VUNPCKHPSYrm 6753 -VUNPCKHPSYrr 6754 -VUNPCKHPSZ 6755 -VUNPCKHPSZrm 6756 -VUNPCKHPSZrmb 6757 -VUNPCKHPSZrmbk 6758 -VUNPCKHPSZrmbkz 6759 -VUNPCKHPSZrmk 6760 -VUNPCKHPSZrmkz 6761 -VUNPCKHPSZrr 6762 -VUNPCKHPSZrrk 6763 -VUNPCKHPSZrrkz 6764 -VUNPCKHPSrm 6765 -VUNPCKHPSrr 6766 -VUNPCKLPDYrm 6767 -VUNPCKLPDYrr 6768 -VUNPCKLPDZ 6769 -VUNPCKLPDZrm 6770 -VUNPCKLPDZrmb 6771 -VUNPCKLPDZrmbk 6772 -VUNPCKLPDZrmbkz 6773 -VUNPCKLPDZrmk 6774 -VUNPCKLPDZrmkz 6775 -VUNPCKLPDZrr 6776 -VUNPCKLPDZrrk 6777 -VUNPCKLPDZrrkz 6778 -VUNPCKLPDrm 6779 -VUNPCKLPDrr 6780 -VUNPCKLPSYrm 6781 -VUNPCKLPSYrr 6782 -VUNPCKLPSZ 6783 -VUNPCKLPSZrm 6784 -VUNPCKLPSZrmb 6785 -VUNPCKLPSZrmbk 6786 -VUNPCKLPSZrmbkz 6787 -VUNPCKLPSZrmk 6788 -VUNPCKLPSZrmkz 6789 -VUNPCKLPSZrr 6790 -VUNPCKLPSZrrk 6791 -VUNPCKLPSZrrkz 6792 -VUNPCKLPSrm 6793 -VUNPCKLPSrr 6794 -VXORPDYrm 6795 -VXORPDYrr 6796 -VXORPDZ 6797 -VXORPDZrm 6798 -VXORPDZrmb 6799 -VXORPDZrmbk 6800 -VXORPDZrmbkz 6801 -VXORPDZrmk 6802 -VXORPDZrmkz 6803 -VXORPDZrr 6804 -VXORPDZrrk 6805 -VXORPDZrrkz 6806 -VXORPDrm 6807 -VXORPDrr 6808 -VXORPSYrm 6809 -VXORPSYrr 6810 -VXORPSZ 6811 -VXORPSZrm 6812 -VXORPSZrmb 6813 -VXORPSZrmbk 6814 -VXORPSZrmbkz 6815 -VXORPSZrmk 6816 -VXORPSZrmkz 6817 -VXORPSZrr 6818 -VXORPSZrrk 6819 -VXORPSZrrkz 6820 -VXORPSrm 6821 -VXORPSrr 6822 -VZEROALL 6823 -VZEROUPPER 6824 -V_SET 6825 -V_SETALLONES 6826 -WAIT 6827 -WBINVD 6828 -WBNOINVD 6829 -WRFLAGS 6830 -WRFSBASE 6831 -WRGSBASE 6832 -WRMSR 6833 -WRMSRLIST 6834 -WRMSRNS 6835 -WRMSRNSir 6836 -WRMSRNSir_EVEX 6837 -WRPKRUr 6838 -WRSSD 6839 -WRSSD_EVEX 6840 -WRSSQ 6841 -WRSSQ_EVEX 6842 -WRUSSD 6843 -WRUSSD_EVEX 6844 -WRUSSQ 6845 -WRUSSQ_EVEX 6846 -XABORT 6847 -XABORT_DEF 6848 -XACQUIRE_PREFIX 6849 -XADD 6850 -XAM_F 6851 -XAM_Fp 6852 -XBEGIN 6853 -XCHG 6854 -XCH_F 6855 -XCRYPTCBC 6856 -XCRYPTCFB 6857 -XCRYPTCTR 6858 -XCRYPTECB 6859 -XCRYPTOFB 6860 -XEND 6861 -XGETBV 6862 -XLAT 6863 -XOR 6864 -XORPDrm 6865 -XORPDrr 6866 -XORPSrm 6867 -XORPSrr 6868 -XRELEASE_PREFIX 6869 -XRESLDTRK 6870 -XRSTOR 6871 -XRSTORS 6872 -XSAVE 6873 -XSAVEC 6874 -XSAVEOPT 6875 -XSAVES 6876 -XSETBV 6877 -XSHA 6878 -XSTORE 6879 -XSUSLDTRK 6880 -XTEST 6881 -Immediate 6882 -CImmediate 6883 -FPImmediate 6884 -MBB 6885 -FrameIndex 6886 -ConstantPoolIndex 6887 -TargetIndex 6888 -JumpTableIndex 6889 -ExternalSymbol 6890 -GlobalAddress 6891 -BlockAddress 6892 -RegisterMask 6893 -RegisterLiveOut 6894 -Metadata 6895 -MCSymbol 6896 -CFIIndex 6897 -IntrinsicID 6898 -Predicate 6899 -ShuffleMask 6900 -PhyReg_GR8 6901 -PhyReg_GRH8 6902 -PhyReg_GR8_NOREX2 6903 -PhyReg_GR8_NOREX 6904 -PhyReg_GR8_ABCD_H 6905 -PhyReg_GR8_ABCD_L 6906 -PhyReg_GRH16 6907 -PhyReg_GR16 6908 -PhyReg_GR16_NOREX2 6909 -PhyReg_GR16_NOREX 6910 -PhyReg_VK1 6911 -PhyReg_VK16 6912 -PhyReg_VK2 6913 -PhyReg_VK4 6914 -PhyReg_VK8 6915 -PhyReg_VK16WM 6916 -PhyReg_VK1WM 6917 -PhyReg_VK2WM 6918 -PhyReg_VK4WM 6919 -PhyReg_VK8WM 6920 -PhyReg_SEGMENT_REG 6921 -PhyReg_GR16_ABCD 6922 -PhyReg_FPCCR 6923 -PhyReg_FR16X 6924 -PhyReg_FR16 6925 -PhyReg_VK16PAIR 6926 -PhyReg_VK1PAIR 6927 -PhyReg_VK2PAIR 6928 -PhyReg_VK4PAIR 6929 -PhyReg_VK8PAIR 6930 -PhyReg_VK1PAIR_with_sub_mask_0_in_VK1WM 6931 -PhyReg_LOW32_ADDR_ACCESS_RBP 6932 -PhyReg_LOW32_ADDR_ACCESS 6933 -PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 6934 -PhyReg_FR32X 6935 -PhyReg_GR32 6936 -PhyReg_GR32_NOSP 6937 -PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 6938 -PhyReg_DEBUG_REG 6939 -PhyReg_FR32 6940 -PhyReg_GR32_NOREX2 6941 -PhyReg_GR32_NOREX2_NOSP 6942 -PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 6943 -PhyReg_GR32_NOREX 6944 -PhyReg_VK32 6945 -PhyReg_GR32_NOREX_NOSP 6946 -PhyReg_RFP32 6947 -PhyReg_VK32WM 6948 -PhyReg_GR32_ABCD 6949 -PhyReg_GR32_TC 6950 -PhyReg_GR32_ABCD_and_GR32_TC 6951 -PhyReg_GR32_AD 6952 -PhyReg_GR32_ArgRef 6953 -PhyReg_GR32_BPSP 6954 -PhyReg_GR32_BSI 6955 -PhyReg_GR32_CB 6956 -PhyReg_GR32_DC 6957 -PhyReg_GR32_DIBP 6958 -PhyReg_GR32_SIDI 6959 -PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 6960 -PhyReg_CCR 6961 -PhyReg_DFCCR 6962 -PhyReg_GR32_ABCD_and_GR32_BSI 6963 -PhyReg_GR32_AD_and_GR32_ArgRef 6964 -PhyReg_GR32_ArgRef_and_GR32_CB 6965 -PhyReg_GR32_BPSP_and_GR32_DIBP 6966 -PhyReg_GR32_BPSP_and_GR32_TC 6967 -PhyReg_GR32_BSI_and_GR32_SIDI 6968 -PhyReg_GR32_DIBP_and_GR32_SIDI 6969 -PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 6970 -PhyReg_LOW32_ADDR_ACCESS_with_sub_32bit 6971 -PhyReg_RFP64 6972 -PhyReg_GR64 6973 -PhyReg_FR64X 6974 -PhyReg_GR64_with_sub_8bit 6975 -PhyReg_GR64_NOSP 6976 -PhyReg_GR64_NOREX2 6977 -PhyReg_CONTROL_REG 6978 -PhyReg_FR64 6979 -PhyReg_GR64_with_sub_16bit_in_GR16_NOREX2 6980 -PhyReg_GR64_NOREX2_NOSP 6981 -PhyReg_GR64PLTSafe 6982 -PhyReg_GR64_TC 6983 -PhyReg_GR64_NOREX 6984 -PhyReg_GR64_TCW64 6985 -PhyReg_GR64_TC_with_sub_8bit 6986 -PhyReg_GR64_NOREX2_NOSP_and_GR64_TC 6987 -PhyReg_GR64_TCW64_with_sub_8bit 6988 -PhyReg_GR64_TC_and_GR64_TCW64 6989 -PhyReg_GR64_with_sub_16bit_in_GR16_NOREX 6990 -PhyReg_VK64 6991 -PhyReg_VR64 6992 -PhyReg_GR64PLTSafe_and_GR64_TC 6993 -PhyReg_GR64_NOREX2_NOSP_and_GR64_TCW64 6994 -PhyReg_GR64_NOREX_NOSP 6995 -PhyReg_GR64_NOREX_and_GR64_TC 6996 -PhyReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 6997 -PhyReg_VK64WM 6998 -PhyReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 6999 -PhyReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 7000 -PhyReg_GR64PLTSafe_and_GR64_TCW64 7001 -PhyReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 7002 -PhyReg_GR64_NOREX_and_GR64_TCW64 7003 -PhyReg_GR64_ABCD 7004 -PhyReg_GR64_with_sub_32bit_in_GR32_TC 7005 -PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 7006 -PhyReg_GR64_AD 7007 -PhyReg_GR64_ArgRef 7008 -PhyReg_GR64_and_LOW32_ADDR_ACCESS_RBP 7009 -PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef 7010 -PhyReg_GR64_with_sub_32bit_in_GR32_BPSP 7011 -PhyReg_GR64_with_sub_32bit_in_GR32_BSI 7012 -PhyReg_GR64_with_sub_32bit_in_GR32_CB 7013 -PhyReg_GR64_with_sub_32bit_in_GR32_DIBP 7014 -PhyReg_GR64_with_sub_32bit_in_GR32_SIDI 7015 -PhyReg_GR64_A 7016 -PhyReg_GR64_ArgRef_and_GR64_TC 7017 -PhyReg_GR64_and_LOW32_ADDR_ACCESS 7018 -PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 7019 -PhyReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7020 -PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7021 -PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7022 -PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7023 -PhyReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7024 -PhyReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7025 -PhyReg_RST 7026 -PhyReg_RFP80 7027 -PhyReg_RFP80_7 7028 -PhyReg_VR128X 7029 -PhyReg_VR128 7030 -PhyReg_VR256X 7031 -PhyReg_VR256 7032 -PhyReg_VR512 7033 -PhyReg_VR512_0_15 7034 -PhyReg_TILE 7035 -PhyReg_TILEPAIR 7036 -VirtReg_GR8 7037 -VirtReg_GRH8 7038 -VirtReg_GR8_NOREX2 7039 -VirtReg_GR8_NOREX 7040 -VirtReg_GR8_ABCD_H 7041 -VirtReg_GR8_ABCD_L 7042 -VirtReg_GRH16 7043 -VirtReg_GR16 7044 -VirtReg_GR16_NOREX2 7045 -VirtReg_GR16_NOREX 7046 -VirtReg_VK1 7047 -VirtReg_VK16 7048 -VirtReg_VK2 7049 -VirtReg_VK4 7050 -VirtReg_VK8 7051 -VirtReg_VK16WM 7052 -VirtReg_VK1WM 7053 -VirtReg_VK2WM 7054 -VirtReg_VK4WM 7055 -VirtReg_VK8WM 7056 -VirtReg_SEGMENT_REG 7057 -VirtReg_GR16_ABCD 7058 -VirtReg_FPCCR 7059 -VirtReg_FR16X 7060 -VirtReg_FR16 7061 -VirtReg_VK16PAIR 7062 -VirtReg_VK1PAIR 7063 -VirtReg_VK2PAIR 7064 -VirtReg_VK4PAIR 7065 -VirtReg_VK8PAIR 7066 -VirtReg_VK1PAIR_with_sub_mask_0_in_VK1WM 7067 -VirtReg_LOW32_ADDR_ACCESS_RBP 7068 -VirtReg_LOW32_ADDR_ACCESS 7069 -VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 7070 -VirtReg_FR32X 7071 -VirtReg_GR32 7072 -VirtReg_GR32_NOSP 7073 -VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 7074 -VirtReg_DEBUG_REG 7075 -VirtReg_FR32 7076 -VirtReg_GR32_NOREX2 7077 -VirtReg_GR32_NOREX2_NOSP 7078 -VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 7079 -VirtReg_GR32_NOREX 7080 -VirtReg_VK32 7081 -VirtReg_GR32_NOREX_NOSP 7082 -VirtReg_RFP32 7083 -VirtReg_VK32WM 7084 -VirtReg_GR32_ABCD 7085 -VirtReg_GR32_TC 7086 -VirtReg_GR32_ABCD_and_GR32_TC 7087 -VirtReg_GR32_AD 7088 -VirtReg_GR32_ArgRef 7089 -VirtReg_GR32_BPSP 7090 -VirtReg_GR32_BSI 7091 -VirtReg_GR32_CB 7092 -VirtReg_GR32_DC 7093 -VirtReg_GR32_DIBP 7094 -VirtReg_GR32_SIDI 7095 -VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 7096 -VirtReg_CCR 7097 -VirtReg_DFCCR 7098 -VirtReg_GR32_ABCD_and_GR32_BSI 7099 -VirtReg_GR32_AD_and_GR32_ArgRef 7100 -VirtReg_GR32_ArgRef_and_GR32_CB 7101 -VirtReg_GR32_BPSP_and_GR32_DIBP 7102 -VirtReg_GR32_BPSP_and_GR32_TC 7103 -VirtReg_GR32_BSI_and_GR32_SIDI 7104 -VirtReg_GR32_DIBP_and_GR32_SIDI 7105 -VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 7106 -VirtReg_LOW32_ADDR_ACCESS_with_sub_32bit 7107 -VirtReg_RFP64 7108 -VirtReg_GR64 7109 -VirtReg_FR64X 7110 -VirtReg_GR64_with_sub_8bit 7111 -VirtReg_GR64_NOSP 7112 -VirtReg_GR64_NOREX2 7113 -VirtReg_CONTROL_REG 7114 -VirtReg_FR64 7115 -VirtReg_GR64_with_sub_16bit_in_GR16_NOREX2 7116 -VirtReg_GR64_NOREX2_NOSP 7117 -VirtReg_GR64PLTSafe 7118 -VirtReg_GR64_TC 7119 -VirtReg_GR64_NOREX 7120 -VirtReg_GR64_TCW64 7121 -VirtReg_GR64_TC_with_sub_8bit 7122 -VirtReg_GR64_NOREX2_NOSP_and_GR64_TC 7123 -VirtReg_GR64_TCW64_with_sub_8bit 7124 -VirtReg_GR64_TC_and_GR64_TCW64 7125 -VirtReg_GR64_with_sub_16bit_in_GR16_NOREX 7126 -VirtReg_VK64 7127 -VirtReg_VR64 7128 -VirtReg_GR64PLTSafe_and_GR64_TC 7129 -VirtReg_GR64_NOREX2_NOSP_and_GR64_TCW64 7130 -VirtReg_GR64_NOREX_NOSP 7131 -VirtReg_GR64_NOREX_and_GR64_TC 7132 -VirtReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 7133 -VirtReg_VK64WM 7134 -VirtReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 7135 -VirtReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 7136 -VirtReg_GR64PLTSafe_and_GR64_TCW64 7137 -VirtReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 7138 -VirtReg_GR64_NOREX_and_GR64_TCW64 7139 -VirtReg_GR64_ABCD 7140 -VirtReg_GR64_with_sub_32bit_in_GR32_TC 7141 -VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 7142 -VirtReg_GR64_AD 7143 -VirtReg_GR64_ArgRef 7144 -VirtReg_GR64_and_LOW32_ADDR_ACCESS_RBP 7145 -VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef 7146 -VirtReg_GR64_with_sub_32bit_in_GR32_BPSP 7147 -VirtReg_GR64_with_sub_32bit_in_GR32_BSI 7148 -VirtReg_GR64_with_sub_32bit_in_GR32_CB 7149 -VirtReg_GR64_with_sub_32bit_in_GR32_DIBP 7150 -VirtReg_GR64_with_sub_32bit_in_GR32_SIDI 7151 -VirtReg_GR64_A 7152 -VirtReg_GR64_ArgRef_and_GR64_TC 7153 -VirtReg_GR64_and_LOW32_ADDR_ACCESS 7154 -VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 7155 -VirtReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7156 -VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7157 -VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7158 -VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7159 -VirtReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7160 -VirtReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7161 -VirtReg_RST 7162 -VirtReg_RFP80 7163 -VirtReg_RFP80_7 7164 -VirtReg_VR128X 7165 -VirtReg_VR128 7166 -VirtReg_VR256X 7167 -VirtReg_VR256 7168 -VirtReg_VR512 7169 -VirtReg_VR512_0_15 7170 -VirtReg_TILE 7171 -VirtReg_TILEPAIR 7172 +PTCMMIMFP 1441 +PTCMMRLFP 1442 +PTCVTROWD 1443 +PTCVTROWPS 1444 +PTDPBF 1445 +PTDPBHF 1446 +PTDPBSSD 1447 +PTDPBSSDV 1448 +PTDPBSUD 1449 +PTDPBSUDV 1450 +PTDPBUSD 1451 +PTDPBUSDV 1452 +PTDPBUUD 1453 +PTDPBUUDV 1454 +PTDPFP 1455 +PTDPHBF 1456 +PTDPHF 1457 +PTESTrm 1458 +PTESTrr 1459 +PTILELOADD 1460 +PTILELOADDRS 1461 +PTILELOADDRST 1462 +PTILELOADDRSV 1463 +PTILELOADDT 1464 +PTILELOADDV 1465 +PTILEMOVROWrre 1466 +PTILEMOVROWrreV 1467 +PTILEMOVROWrri 1468 +PTILEMOVROWrriV 1469 +PTILESTORED 1470 +PTILESTOREDV 1471 +PTILEZERO 1472 +PTILEZEROV 1473 +PTMMULTF 1474 +PTWRITE 1475 +PTWRITEm 1476 +PTWRITEr 1477 +PUNPCKHBWrm 1478 +PUNPCKHBWrr 1479 +PUNPCKHDQrm 1480 +PUNPCKHDQrr 1481 +PUNPCKHQDQrm 1482 +PUNPCKHQDQrr 1483 +PUNPCKHWDrm 1484 +PUNPCKHWDrr 1485 +PUNPCKLBWrm 1486 +PUNPCKLBWrr 1487 +PUNPCKLDQrm 1488 +PUNPCKLDQrr 1489 +PUNPCKLQDQrm 1490 +PUNPCKLQDQrr 1491 +PUNPCKLWDrm 1492 +PUNPCKLWDrr 1493 +PUSH 1494 +PUSHA 1495 +PUSHCS 1496 +PUSHDS 1497 +PUSHES 1498 +PUSHF 1499 +PUSHFS 1500 +PUSHGS 1501 +PUSHP 1502 +PUSHSS 1503 +PVALIDATE 1504 +PXORrm 1505 +PXORrr 1506 +RCL 1507 +RCPPSm 1508 +RCPPSr 1509 +RCPSSm 1510 +RCPSSm_Int 1511 +RCPSSr 1512 +RCPSSr_Int 1513 +RCR 1514 +RDFLAGS 1515 +RDFSBASE 1516 +RDGSBASE 1517 +RDMSR 1518 +RDMSRLIST 1519 +RDMSRri 1520 +RDMSRri_EVEX 1521 +RDPID 1522 +RDPKRUr 1523 +RDPMC 1524 +RDPRU 1525 +RDRAND 1526 +RDSEED 1527 +RDSSPD 1528 +RDSSPQ 1529 +RDTSC 1530 +RDTSCP 1531 +REG_SEQUENCE 1532 +REPNE_PREFIX 1533 +REP_MOVSB 1534 +REP_MOVSD 1535 +REP_MOVSQ 1536 +REP_MOVSW 1537 +REP_PREFIX 1538 +REP_STOSB 1539 +REP_STOSD 1540 +REP_STOSQ 1541 +REP_STOSW 1542 +RET 1543 +RETI 1544 +REX 1545 +RMPADJUST 1546 +RMPQUERY 1547 +RMPUPDATE 1548 +ROL 1549 +ROR 1550 +RORX 1551 +ROUNDPDmi 1552 +ROUNDPDri 1553 +ROUNDPSmi 1554 +ROUNDPSri 1555 +ROUNDSDmi 1556 +ROUNDSDmi_Int 1557 +ROUNDSDri 1558 +ROUNDSDri_Int 1559 +ROUNDSSmi 1560 +ROUNDSSmi_Int 1561 +ROUNDSSri 1562 +ROUNDSSri_Int 1563 +RSM 1564 +RSQRTPSm 1565 +RSQRTPSr 1566 +RSQRTSSm 1567 +RSQRTSSm_Int 1568 +RSQRTSSr 1569 +RSQRTSSr_Int 1570 +RSTORSSP 1571 +SAHF 1572 +SALC 1573 +SAR 1574 +SARX 1575 +SAVEPREVSSP 1576 +SBB 1577 +SCASB 1578 +SCASL 1579 +SCASQ 1580 +SCASW 1581 +SEAMCALL 1582 +SEAMOPS 1583 +SEAMRET 1584 +SEG_ALLOCA 1585 +SEH_BeginEpilogue 1586 +SEH_EndEpilogue 1587 +SEH_EndPrologue 1588 +SEH_PushFrame 1589 +SEH_PushReg 1590 +SEH_SaveReg 1591 +SEH_SaveXMM 1592 +SEH_SetFrame 1593 +SEH_StackAlign 1594 +SEH_StackAlloc 1595 +SEH_UnwindV 1596 +SEH_UnwindVersion 1597 +SENDUIPI 1598 +SERIALIZE 1599 +SETB_C 1600 +SETCCm 1601 +SETCCm_EVEX 1602 +SETCCr 1603 +SETCCr_EVEX 1604 +SETSSBSY 1605 +SETZUCCm 1606 +SETZUCCr 1607 +SFENCE 1608 +SGDT 1609 +SHA 1610 +SHL 1611 +SHLD 1612 +SHLDROT 1613 +SHLX 1614 +SHR 1615 +SHRD 1616 +SHRDROT 1617 +SHRX 1618 +SHUFPDrmi 1619 +SHUFPDrri 1620 +SHUFPSrmi 1621 +SHUFPSrri 1622 +SIDT 1623 +SKINIT 1624 +SLDT 1625 +SLWPCB 1626 +SMSW 1627 +SQRTPDm 1628 +SQRTPDr 1629 +SQRTPSm 1630 +SQRTPSr 1631 +SQRTSDm 1632 +SQRTSDm_Int 1633 +SQRTSDr 1634 +SQRTSDr_Int 1635 +SQRTSSm 1636 +SQRTSSm_Int 1637 +SQRTSSr 1638 +SQRTSSr_Int 1639 +SQRT_F 1640 +SQRT_Fp 1641 +SS_PREFIX 1642 +STAC 1643 +STACKALLOC_W_PROBING 1644 +STACKMAP 1645 +STATEPOINT 1646 +STC 1647 +STD 1648 +STGI 1649 +STI 1650 +STMXCSR 1651 +STOSB 1652 +STOSL 1653 +STOSQ 1654 +STOSW 1655 +STR 1656 +STRm 1657 +STTILECFG 1658 +STTILECFG_EVEX 1659 +STUI 1660 +ST_F 1661 +ST_FP 1662 +ST_FPrr 1663 +ST_Fp 1664 +ST_FpP 1665 +ST_Frr 1666 +SUB 1667 +SUBPDrm 1668 +SUBPDrr 1669 +SUBPSrm 1670 +SUBPSrr 1671 +SUBREG_TO_REG 1672 +SUBR_F 1673 +SUBR_FI 1674 +SUBR_FPrST 1675 +SUBR_FST 1676 +SUBR_Fp 1677 +SUBR_FpI 1678 +SUBR_FrST 1679 +SUBSDrm 1680 +SUBSDrm_Int 1681 +SUBSDrr 1682 +SUBSDrr_Int 1683 +SUBSSrm 1684 +SUBSSrm_Int 1685 +SUBSSrr 1686 +SUBSSrr_Int 1687 +SUB_F 1688 +SUB_FI 1689 +SUB_FPrST 1690 +SUB_FST 1691 +SUB_Fp 1692 +SUB_FpI 1693 +SUB_FrST 1694 +SWAPGS 1695 +SYSCALL 1696 +SYSENTER 1697 +SYSEXIT 1698 +SYSRET 1699 +T 1700 +TAILJMPd 1701 +TAILJMPd_CC 1702 +TAILJMPm 1703 +TAILJMPr 1704 +TCMMIMFP 1705 +TCMMRLFP 1706 +TCRETURN_HIPE 1707 +TCRETURN_WIN 1708 +TCRETURN_WINmi 1709 +TCRETURNdi 1710 +TCRETURNdicc 1711 +TCRETURNmi 1712 +TCRETURNri 1713 +TCVTROWD 1714 +TCVTROWPS 1715 +TDCALL 1716 +TDPBF 1717 +TDPBHF 1718 +TDPBSSD 1719 +TDPBSUD 1720 +TDPBUSD 1721 +TDPBUUD 1722 +TDPFP 1723 +TDPHBF 1724 +TDPHF 1725 +TEST 1726 +TESTUI 1727 +TILELOADD 1728 +TILELOADDRS 1729 +TILELOADDRST 1730 +TILELOADDRS_EVEX 1731 +TILELOADDT 1732 +TILELOADD_EVEX 1733 +TILEMOVROWrre 1734 +TILEMOVROWrri 1735 +TILERELEASE 1736 +TILESTORED 1737 +TILESTORED_EVEX 1738 +TILEZERO 1739 +TLBSYNC 1740 +TLSCall 1741 +TLS_addr 1742 +TLS_addrX 1743 +TLS_base_addr 1744 +TLS_base_addrX 1745 +TLS_desc 1746 +TMMULTF 1747 +TPAUSE 1748 +TRAP 1749 +TST_F 1750 +TST_Fp 1751 +TZCNT 1752 +TZMSK 1753 +UBSAN_UD 1754 +UCOMISDrm 1755 +UCOMISDrm_Int 1756 +UCOMISDrr 1757 +UCOMISDrr_Int 1758 +UCOMISSrm 1759 +UCOMISSrm_Int 1760 +UCOMISSrr 1761 +UCOMISSrr_Int 1762 +UCOM_FIPr 1763 +UCOM_FIr 1764 +UCOM_FPPr 1765 +UCOM_FPr 1766 +UCOM_FpIr 1767 +UCOM_Fpr 1768 +UCOM_Fr 1769 +UD 1770 +UIRET 1771 +UMONITOR 1772 +UMWAIT 1773 +UNPCKHPDrm 1774 +UNPCKHPDrr 1775 +UNPCKHPSrm 1776 +UNPCKHPSrr 1777 +UNPCKLPDrm 1778 +UNPCKLPDrr 1779 +UNPCKLPSrm 1780 +UNPCKLPSrr 1781 +URDMSRri 1782 +URDMSRri_EVEX 1783 +URDMSRrr 1784 +URDMSRrr_EVEX 1785 +UWRMSRir 1786 +UWRMSRir_EVEX 1787 +UWRMSRrr 1788 +UWRMSRrr_EVEX 1789 +V 1790 +VAARG 1791 +VAARG_X 1792 +VADDBF 1793 +VADDPDYrm 1794 +VADDPDYrr 1795 +VADDPDZ 1796 +VADDPDZrm 1797 +VADDPDZrmb 1798 +VADDPDZrmbk 1799 +VADDPDZrmbkz 1800 +VADDPDZrmk 1801 +VADDPDZrmkz 1802 +VADDPDZrr 1803 +VADDPDZrrb 1804 +VADDPDZrrbk 1805 +VADDPDZrrbkz 1806 +VADDPDZrrk 1807 +VADDPDZrrkz 1808 +VADDPDrm 1809 +VADDPDrr 1810 +VADDPHZ 1811 +VADDPHZrm 1812 +VADDPHZrmb 1813 +VADDPHZrmbk 1814 +VADDPHZrmbkz 1815 +VADDPHZrmk 1816 +VADDPHZrmkz 1817 +VADDPHZrr 1818 +VADDPHZrrb 1819 +VADDPHZrrbk 1820 +VADDPHZrrbkz 1821 +VADDPHZrrk 1822 +VADDPHZrrkz 1823 +VADDPSYrm 1824 +VADDPSYrr 1825 +VADDPSZ 1826 +VADDPSZrm 1827 +VADDPSZrmb 1828 +VADDPSZrmbk 1829 +VADDPSZrmbkz 1830 +VADDPSZrmk 1831 +VADDPSZrmkz 1832 +VADDPSZrr 1833 +VADDPSZrrb 1834 +VADDPSZrrbk 1835 +VADDPSZrrbkz 1836 +VADDPSZrrk 1837 +VADDPSZrrkz 1838 +VADDPSrm 1839 +VADDPSrr 1840 +VADDSDZrm 1841 +VADDSDZrm_Int 1842 +VADDSDZrmk_Int 1843 +VADDSDZrmkz_Int 1844 +VADDSDZrr 1845 +VADDSDZrr_Int 1846 +VADDSDZrrb_Int 1847 +VADDSDZrrbk_Int 1848 +VADDSDZrrbkz_Int 1849 +VADDSDZrrk_Int 1850 +VADDSDZrrkz_Int 1851 +VADDSDrm 1852 +VADDSDrm_Int 1853 +VADDSDrr 1854 +VADDSDrr_Int 1855 +VADDSHZrm 1856 +VADDSHZrm_Int 1857 +VADDSHZrmk_Int 1858 +VADDSHZrmkz_Int 1859 +VADDSHZrr 1860 +VADDSHZrr_Int 1861 +VADDSHZrrb_Int 1862 +VADDSHZrrbk_Int 1863 +VADDSHZrrbkz_Int 1864 +VADDSHZrrk_Int 1865 +VADDSHZrrkz_Int 1866 +VADDSSZrm 1867 +VADDSSZrm_Int 1868 +VADDSSZrmk_Int 1869 +VADDSSZrmkz_Int 1870 +VADDSSZrr 1871 +VADDSSZrr_Int 1872 +VADDSSZrrb_Int 1873 +VADDSSZrrbk_Int 1874 +VADDSSZrrbkz_Int 1875 +VADDSSZrrk_Int 1876 +VADDSSZrrkz_Int 1877 +VADDSSrm 1878 +VADDSSrm_Int 1879 +VADDSSrr 1880 +VADDSSrr_Int 1881 +VADDSUBPDYrm 1882 +VADDSUBPDYrr 1883 +VADDSUBPDrm 1884 +VADDSUBPDrr 1885 +VADDSUBPSYrm 1886 +VADDSUBPSYrr 1887 +VADDSUBPSrm 1888 +VADDSUBPSrr 1889 +VAESDECLASTYrm 1890 +VAESDECLASTYrr 1891 +VAESDECLASTZ 1892 +VAESDECLASTZrm 1893 +VAESDECLASTZrr 1894 +VAESDECLASTrm 1895 +VAESDECLASTrr 1896 +VAESDECYrm 1897 +VAESDECYrr 1898 +VAESDECZ 1899 +VAESDECZrm 1900 +VAESDECZrr 1901 +VAESDECrm 1902 +VAESDECrr 1903 +VAESENCLASTYrm 1904 +VAESENCLASTYrr 1905 +VAESENCLASTZ 1906 +VAESENCLASTZrm 1907 +VAESENCLASTZrr 1908 +VAESENCLASTrm 1909 +VAESENCLASTrr 1910 +VAESENCYrm 1911 +VAESENCYrr 1912 +VAESENCZ 1913 +VAESENCZrm 1914 +VAESENCZrr 1915 +VAESENCrm 1916 +VAESENCrr 1917 +VAESIMCrm 1918 +VAESIMCrr 1919 +VAESKEYGENASSISTrmi 1920 +VAESKEYGENASSISTrri 1921 +VALIGNDZ 1922 +VALIGNDZrmbi 1923 +VALIGNDZrmbik 1924 +VALIGNDZrmbikz 1925 +VALIGNDZrmi 1926 +VALIGNDZrmik 1927 +VALIGNDZrmikz 1928 +VALIGNDZrri 1929 +VALIGNDZrrik 1930 +VALIGNDZrrikz 1931 +VALIGNQZ 1932 +VALIGNQZrmbi 1933 +VALIGNQZrmbik 1934 +VALIGNQZrmbikz 1935 +VALIGNQZrmi 1936 +VALIGNQZrmik 1937 +VALIGNQZrmikz 1938 +VALIGNQZrri 1939 +VALIGNQZrrik 1940 +VALIGNQZrrikz 1941 +VANDNPDYrm 1942 +VANDNPDYrr 1943 +VANDNPDZ 1944 +VANDNPDZrm 1945 +VANDNPDZrmb 1946 +VANDNPDZrmbk 1947 +VANDNPDZrmbkz 1948 +VANDNPDZrmk 1949 +VANDNPDZrmkz 1950 +VANDNPDZrr 1951 +VANDNPDZrrk 1952 +VANDNPDZrrkz 1953 +VANDNPDrm 1954 +VANDNPDrr 1955 +VANDNPSYrm 1956 +VANDNPSYrr 1957 +VANDNPSZ 1958 +VANDNPSZrm 1959 +VANDNPSZrmb 1960 +VANDNPSZrmbk 1961 +VANDNPSZrmbkz 1962 +VANDNPSZrmk 1963 +VANDNPSZrmkz 1964 +VANDNPSZrr 1965 +VANDNPSZrrk 1966 +VANDNPSZrrkz 1967 +VANDNPSrm 1968 +VANDNPSrr 1969 +VANDPDYrm 1970 +VANDPDYrr 1971 +VANDPDZ 1972 +VANDPDZrm 1973 +VANDPDZrmb 1974 +VANDPDZrmbk 1975 +VANDPDZrmbkz 1976 +VANDPDZrmk 1977 +VANDPDZrmkz 1978 +VANDPDZrr 1979 +VANDPDZrrk 1980 +VANDPDZrrkz 1981 +VANDPDrm 1982 +VANDPDrr 1983 +VANDPSYrm 1984 +VANDPSYrr 1985 +VANDPSZ 1986 +VANDPSZrm 1987 +VANDPSZrmb 1988 +VANDPSZrmbk 1989 +VANDPSZrmbkz 1990 +VANDPSZrmk 1991 +VANDPSZrmkz 1992 +VANDPSZrr 1993 +VANDPSZrrk 1994 +VANDPSZrrkz 1995 +VANDPSrm 1996 +VANDPSrr 1997 +VASTART_SAVE_XMM_REGS 1998 +VBCSTNEBF 1999 +VBCSTNESH 2000 +VBLENDMPDZ 2001 +VBLENDMPDZrm 2002 +VBLENDMPDZrmb 2003 +VBLENDMPDZrmbk 2004 +VBLENDMPDZrmbkz 2005 +VBLENDMPDZrmk 2006 +VBLENDMPDZrmkz 2007 +VBLENDMPDZrr 2008 +VBLENDMPDZrrk 2009 +VBLENDMPDZrrkz 2010 +VBLENDMPSZ 2011 +VBLENDMPSZrm 2012 +VBLENDMPSZrmb 2013 +VBLENDMPSZrmbk 2014 +VBLENDMPSZrmbkz 2015 +VBLENDMPSZrmk 2016 +VBLENDMPSZrmkz 2017 +VBLENDMPSZrr 2018 +VBLENDMPSZrrk 2019 +VBLENDMPSZrrkz 2020 +VBLENDPDYrmi 2021 +VBLENDPDYrri 2022 +VBLENDPDrmi 2023 +VBLENDPDrri 2024 +VBLENDPSYrmi 2025 +VBLENDPSYrri 2026 +VBLENDPSrmi 2027 +VBLENDPSrri 2028 +VBLENDVPDYrmr 2029 +VBLENDVPDYrrr 2030 +VBLENDVPDrmr 2031 +VBLENDVPDrrr 2032 +VBLENDVPSYrmr 2033 +VBLENDVPSYrrr 2034 +VBLENDVPSrmr 2035 +VBLENDVPSrrr 2036 +VBROADCASTF 2037 +VBROADCASTI 2038 +VBROADCASTSDYrm 2039 +VBROADCASTSDYrr 2040 +VBROADCASTSDZ 2041 +VBROADCASTSDZrm 2042 +VBROADCASTSDZrmk 2043 +VBROADCASTSDZrmkz 2044 +VBROADCASTSDZrr 2045 +VBROADCASTSDZrrk 2046 +VBROADCASTSDZrrkz 2047 +VBROADCASTSSYrm 2048 +VBROADCASTSSYrr 2049 +VBROADCASTSSZ 2050 +VBROADCASTSSZrm 2051 +VBROADCASTSSZrmk 2052 +VBROADCASTSSZrmkz 2053 +VBROADCASTSSZrr 2054 +VBROADCASTSSZrrk 2055 +VBROADCASTSSZrrkz 2056 +VBROADCASTSSrm 2057 +VBROADCASTSSrr 2058 +VCMPBF 2059 +VCMPPDYrmi 2060 +VCMPPDYrri 2061 +VCMPPDZ 2062 +VCMPPDZrmbi 2063 +VCMPPDZrmbik 2064 +VCMPPDZrmi 2065 +VCMPPDZrmik 2066 +VCMPPDZrri 2067 +VCMPPDZrrib 2068 +VCMPPDZrribk 2069 +VCMPPDZrrik 2070 +VCMPPDrmi 2071 +VCMPPDrri 2072 +VCMPPHZ 2073 +VCMPPHZrmbi 2074 +VCMPPHZrmbik 2075 +VCMPPHZrmi 2076 +VCMPPHZrmik 2077 +VCMPPHZrri 2078 +VCMPPHZrrib 2079 +VCMPPHZrribk 2080 +VCMPPHZrrik 2081 +VCMPPSYrmi 2082 +VCMPPSYrri 2083 +VCMPPSZ 2084 +VCMPPSZrmbi 2085 +VCMPPSZrmbik 2086 +VCMPPSZrmi 2087 +VCMPPSZrmik 2088 +VCMPPSZrri 2089 +VCMPPSZrrib 2090 +VCMPPSZrribk 2091 +VCMPPSZrrik 2092 +VCMPPSrmi 2093 +VCMPPSrri 2094 +VCMPSDZrmi 2095 +VCMPSDZrmi_Int 2096 +VCMPSDZrmik_Int 2097 +VCMPSDZrri 2098 +VCMPSDZrri_Int 2099 +VCMPSDZrrib_Int 2100 +VCMPSDZrribk_Int 2101 +VCMPSDZrrik_Int 2102 +VCMPSDrmi 2103 +VCMPSDrmi_Int 2104 +VCMPSDrri 2105 +VCMPSDrri_Int 2106 +VCMPSHZrmi 2107 +VCMPSHZrmi_Int 2108 +VCMPSHZrmik_Int 2109 +VCMPSHZrri 2110 +VCMPSHZrri_Int 2111 +VCMPSHZrrib_Int 2112 +VCMPSHZrribk_Int 2113 +VCMPSHZrrik_Int 2114 +VCMPSSZrmi 2115 +VCMPSSZrmi_Int 2116 +VCMPSSZrmik_Int 2117 +VCMPSSZrri 2118 +VCMPSSZrri_Int 2119 +VCMPSSZrrib_Int 2120 +VCMPSSZrribk_Int 2121 +VCMPSSZrrik_Int 2122 +VCMPSSrmi 2123 +VCMPSSrmi_Int 2124 +VCMPSSrri 2125 +VCMPSSrri_Int 2126 +VCOMISBF 2127 +VCOMISDZrm 2128 +VCOMISDZrm_Int 2129 +VCOMISDZrr 2130 +VCOMISDZrr_Int 2131 +VCOMISDZrrb 2132 +VCOMISDrm 2133 +VCOMISDrm_Int 2134 +VCOMISDrr 2135 +VCOMISDrr_Int 2136 +VCOMISHZrm 2137 +VCOMISHZrm_Int 2138 +VCOMISHZrr 2139 +VCOMISHZrr_Int 2140 +VCOMISHZrrb 2141 +VCOMISSZrm 2142 +VCOMISSZrm_Int 2143 +VCOMISSZrr 2144 +VCOMISSZrr_Int 2145 +VCOMISSZrrb 2146 +VCOMISSrm 2147 +VCOMISSrm_Int 2148 +VCOMISSrr 2149 +VCOMISSrr_Int 2150 +VCOMPRESSPDZ 2151 +VCOMPRESSPDZmr 2152 +VCOMPRESSPDZmrk 2153 +VCOMPRESSPDZrr 2154 +VCOMPRESSPDZrrk 2155 +VCOMPRESSPDZrrkz 2156 +VCOMPRESSPSZ 2157 +VCOMPRESSPSZmr 2158 +VCOMPRESSPSZmrk 2159 +VCOMPRESSPSZrr 2160 +VCOMPRESSPSZrrk 2161 +VCOMPRESSPSZrrkz 2162 +VCOMXSDZrm_Int 2163 +VCOMXSDZrr_Int 2164 +VCOMXSDZrrb_Int 2165 +VCOMXSHZrm_Int 2166 +VCOMXSHZrr_Int 2167 +VCOMXSHZrrb_Int 2168 +VCOMXSSZrm_Int 2169 +VCOMXSSZrr_Int 2170 +VCOMXSSZrrb_Int 2171 +VCVT 2172 +VCVTBF 2173 +VCVTBIASPH 2174 +VCVTDQ 2175 +VCVTHF 2176 +VCVTNE 2177 +VCVTNEEBF 2178 +VCVTNEEPH 2179 +VCVTNEOBF 2180 +VCVTNEOPH 2181 +VCVTNEPS 2182 +VCVTPD 2183 +VCVTPH 2184 +VCVTPS 2185 +VCVTQQ 2186 +VCVTSD 2187 +VCVTSH 2188 +VCVTSI 2189 +VCVTSS 2190 +VCVTTBF 2191 +VCVTTPD 2192 +VCVTTPH 2193 +VCVTTPS 2194 +VCVTTSD 2195 +VCVTTSH 2196 +VCVTTSS 2197 +VCVTUDQ 2198 +VCVTUQQ 2199 +VCVTUSI 2200 +VCVTUW 2201 +VCVTW 2202 +VDBPSADBWZ 2203 +VDBPSADBWZrmi 2204 +VDBPSADBWZrmik 2205 +VDBPSADBWZrmikz 2206 +VDBPSADBWZrri 2207 +VDBPSADBWZrrik 2208 +VDBPSADBWZrrikz 2209 +VDIVBF 2210 +VDIVPDYrm 2211 +VDIVPDYrr 2212 +VDIVPDZ 2213 +VDIVPDZrm 2214 +VDIVPDZrmb 2215 +VDIVPDZrmbk 2216 +VDIVPDZrmbkz 2217 +VDIVPDZrmk 2218 +VDIVPDZrmkz 2219 +VDIVPDZrr 2220 +VDIVPDZrrb 2221 +VDIVPDZrrbk 2222 +VDIVPDZrrbkz 2223 +VDIVPDZrrk 2224 +VDIVPDZrrkz 2225 +VDIVPDrm 2226 +VDIVPDrr 2227 +VDIVPHZ 2228 +VDIVPHZrm 2229 +VDIVPHZrmb 2230 +VDIVPHZrmbk 2231 +VDIVPHZrmbkz 2232 +VDIVPHZrmk 2233 +VDIVPHZrmkz 2234 +VDIVPHZrr 2235 +VDIVPHZrrb 2236 +VDIVPHZrrbk 2237 +VDIVPHZrrbkz 2238 +VDIVPHZrrk 2239 +VDIVPHZrrkz 2240 +VDIVPSYrm 2241 +VDIVPSYrr 2242 +VDIVPSZ 2243 +VDIVPSZrm 2244 +VDIVPSZrmb 2245 +VDIVPSZrmbk 2246 +VDIVPSZrmbkz 2247 +VDIVPSZrmk 2248 +VDIVPSZrmkz 2249 +VDIVPSZrr 2250 +VDIVPSZrrb 2251 +VDIVPSZrrbk 2252 +VDIVPSZrrbkz 2253 +VDIVPSZrrk 2254 +VDIVPSZrrkz 2255 +VDIVPSrm 2256 +VDIVPSrr 2257 +VDIVSDZrm 2258 +VDIVSDZrm_Int 2259 +VDIVSDZrmk_Int 2260 +VDIVSDZrmkz_Int 2261 +VDIVSDZrr 2262 +VDIVSDZrr_Int 2263 +VDIVSDZrrb_Int 2264 +VDIVSDZrrbk_Int 2265 +VDIVSDZrrbkz_Int 2266 +VDIVSDZrrk_Int 2267 +VDIVSDZrrkz_Int 2268 +VDIVSDrm 2269 +VDIVSDrm_Int 2270 +VDIVSDrr 2271 +VDIVSDrr_Int 2272 +VDIVSHZrm 2273 +VDIVSHZrm_Int 2274 +VDIVSHZrmk_Int 2275 +VDIVSHZrmkz_Int 2276 +VDIVSHZrr 2277 +VDIVSHZrr_Int 2278 +VDIVSHZrrb_Int 2279 +VDIVSHZrrbk_Int 2280 +VDIVSHZrrbkz_Int 2281 +VDIVSHZrrk_Int 2282 +VDIVSHZrrkz_Int 2283 +VDIVSSZrm 2284 +VDIVSSZrm_Int 2285 +VDIVSSZrmk_Int 2286 +VDIVSSZrmkz_Int 2287 +VDIVSSZrr 2288 +VDIVSSZrr_Int 2289 +VDIVSSZrrb_Int 2290 +VDIVSSZrrbk_Int 2291 +VDIVSSZrrbkz_Int 2292 +VDIVSSZrrk_Int 2293 +VDIVSSZrrkz_Int 2294 +VDIVSSrm 2295 +VDIVSSrm_Int 2296 +VDIVSSrr 2297 +VDIVSSrr_Int 2298 +VDPBF 2299 +VDPPDrmi 2300 +VDPPDrri 2301 +VDPPHPSZ 2302 +VDPPHPSZm 2303 +VDPPHPSZmb 2304 +VDPPHPSZmbk 2305 +VDPPHPSZmbkz 2306 +VDPPHPSZmk 2307 +VDPPHPSZmkz 2308 +VDPPHPSZr 2309 +VDPPHPSZrk 2310 +VDPPHPSZrkz 2311 +VDPPSYrmi 2312 +VDPPSYrri 2313 +VDPPSrmi 2314 +VDPPSrri 2315 +VERRm 2316 +VERRr 2317 +VERWm 2318 +VERWr 2319 +VEXP 2320 +VEXPANDPDZ 2321 +VEXPANDPDZrm 2322 +VEXPANDPDZrmk 2323 +VEXPANDPDZrmkz 2324 +VEXPANDPDZrr 2325 +VEXPANDPDZrrk 2326 +VEXPANDPDZrrkz 2327 +VEXPANDPSZ 2328 +VEXPANDPSZrm 2329 +VEXPANDPSZrmk 2330 +VEXPANDPSZrmkz 2331 +VEXPANDPSZrr 2332 +VEXPANDPSZrrk 2333 +VEXPANDPSZrrkz 2334 +VEXTRACTF 2335 +VEXTRACTI 2336 +VEXTRACTPSZmri 2337 +VEXTRACTPSZrri 2338 +VEXTRACTPSmri 2339 +VEXTRACTPSrri 2340 +VFCMADDCPHZ 2341 +VFCMADDCPHZm 2342 +VFCMADDCPHZmb 2343 +VFCMADDCPHZmbk 2344 +VFCMADDCPHZmbkz 2345 +VFCMADDCPHZmk 2346 +VFCMADDCPHZmkz 2347 +VFCMADDCPHZr 2348 +VFCMADDCPHZrb 2349 +VFCMADDCPHZrbk 2350 +VFCMADDCPHZrbkz 2351 +VFCMADDCPHZrk 2352 +VFCMADDCPHZrkz 2353 +VFCMADDCSHZm 2354 +VFCMADDCSHZmk 2355 +VFCMADDCSHZmkz 2356 +VFCMADDCSHZr 2357 +VFCMADDCSHZrb 2358 +VFCMADDCSHZrbk 2359 +VFCMADDCSHZrbkz 2360 +VFCMADDCSHZrk 2361 +VFCMADDCSHZrkz 2362 +VFCMULCPHZ 2363 +VFCMULCPHZrm 2364 +VFCMULCPHZrmb 2365 +VFCMULCPHZrmbk 2366 +VFCMULCPHZrmbkz 2367 +VFCMULCPHZrmk 2368 +VFCMULCPHZrmkz 2369 +VFCMULCPHZrr 2370 +VFCMULCPHZrrb 2371 +VFCMULCPHZrrbk 2372 +VFCMULCPHZrrbkz 2373 +VFCMULCPHZrrk 2374 +VFCMULCPHZrrkz 2375 +VFCMULCSHZrm 2376 +VFCMULCSHZrmk 2377 +VFCMULCSHZrmkz 2378 +VFCMULCSHZrr 2379 +VFCMULCSHZrrb 2380 +VFCMULCSHZrrbk 2381 +VFCMULCSHZrrbkz 2382 +VFCMULCSHZrrk 2383 +VFCMULCSHZrrkz 2384 +VFIXUPIMMPDZ 2385 +VFIXUPIMMPDZrmbi 2386 +VFIXUPIMMPDZrmbik 2387 +VFIXUPIMMPDZrmbikz 2388 +VFIXUPIMMPDZrmi 2389 +VFIXUPIMMPDZrmik 2390 +VFIXUPIMMPDZrmikz 2391 +VFIXUPIMMPDZrri 2392 +VFIXUPIMMPDZrrib 2393 +VFIXUPIMMPDZrribk 2394 +VFIXUPIMMPDZrribkz 2395 +VFIXUPIMMPDZrrik 2396 +VFIXUPIMMPDZrrikz 2397 +VFIXUPIMMPSZ 2398 +VFIXUPIMMPSZrmbi 2399 +VFIXUPIMMPSZrmbik 2400 +VFIXUPIMMPSZrmbikz 2401 +VFIXUPIMMPSZrmi 2402 +VFIXUPIMMPSZrmik 2403 +VFIXUPIMMPSZrmikz 2404 +VFIXUPIMMPSZrri 2405 +VFIXUPIMMPSZrrib 2406 +VFIXUPIMMPSZrribk 2407 +VFIXUPIMMPSZrribkz 2408 +VFIXUPIMMPSZrrik 2409 +VFIXUPIMMPSZrrikz 2410 +VFIXUPIMMSDZrmi 2411 +VFIXUPIMMSDZrmik 2412 +VFIXUPIMMSDZrmikz 2413 +VFIXUPIMMSDZrri 2414 +VFIXUPIMMSDZrrib 2415 +VFIXUPIMMSDZrribk 2416 +VFIXUPIMMSDZrribkz 2417 +VFIXUPIMMSDZrrik 2418 +VFIXUPIMMSDZrrikz 2419 +VFIXUPIMMSSZrmi 2420 +VFIXUPIMMSSZrmik 2421 +VFIXUPIMMSSZrmikz 2422 +VFIXUPIMMSSZrri 2423 +VFIXUPIMMSSZrrib 2424 +VFIXUPIMMSSZrribk 2425 +VFIXUPIMMSSZrribkz 2426 +VFIXUPIMMSSZrrik 2427 +VFIXUPIMMSSZrrikz 2428 +VFMADD 2429 +VFMADDCPHZ 2430 +VFMADDCPHZm 2431 +VFMADDCPHZmb 2432 +VFMADDCPHZmbk 2433 +VFMADDCPHZmbkz 2434 +VFMADDCPHZmk 2435 +VFMADDCPHZmkz 2436 +VFMADDCPHZr 2437 +VFMADDCPHZrb 2438 +VFMADDCPHZrbk 2439 +VFMADDCPHZrbkz 2440 +VFMADDCPHZrk 2441 +VFMADDCPHZrkz 2442 +VFMADDCSHZm 2443 +VFMADDCSHZmk 2444 +VFMADDCSHZmkz 2445 +VFMADDCSHZr 2446 +VFMADDCSHZrb 2447 +VFMADDCSHZrbk 2448 +VFMADDCSHZrbkz 2449 +VFMADDCSHZrk 2450 +VFMADDCSHZrkz 2451 +VFMADDPD 2452 +VFMADDPS 2453 +VFMADDSD 2454 +VFMADDSS 2455 +VFMADDSUB 2456 +VFMADDSUBPD 2457 +VFMADDSUBPS 2458 +VFMSUB 2459 +VFMSUBADD 2460 +VFMSUBADDPD 2461 +VFMSUBADDPS 2462 +VFMSUBPD 2463 +VFMSUBPS 2464 +VFMSUBSD 2465 +VFMSUBSS 2466 +VFMULCPHZ 2467 +VFMULCPHZrm 2468 +VFMULCPHZrmb 2469 +VFMULCPHZrmbk 2470 +VFMULCPHZrmbkz 2471 +VFMULCPHZrmk 2472 +VFMULCPHZrmkz 2473 +VFMULCPHZrr 2474 +VFMULCPHZrrb 2475 +VFMULCPHZrrbk 2476 +VFMULCPHZrrbkz 2477 +VFMULCPHZrrk 2478 +VFMULCPHZrrkz 2479 +VFMULCSHZrm 2480 +VFMULCSHZrmk 2481 +VFMULCSHZrmkz 2482 +VFMULCSHZrr 2483 +VFMULCSHZrrb 2484 +VFMULCSHZrrbk 2485 +VFMULCSHZrrbkz 2486 +VFMULCSHZrrk 2487 +VFMULCSHZrrkz 2488 +VFNMADD 2489 +VFNMADDPD 2490 +VFNMADDPS 2491 +VFNMADDSD 2492 +VFNMADDSS 2493 +VFNMSUB 2494 +VFNMSUBPD 2495 +VFNMSUBPS 2496 +VFNMSUBSD 2497 +VFNMSUBSS 2498 +VFPCLASSBF 2499 +VFPCLASSPDZ 2500 +VFPCLASSPDZmbi 2501 +VFPCLASSPDZmbik 2502 +VFPCLASSPDZmi 2503 +VFPCLASSPDZmik 2504 +VFPCLASSPDZri 2505 +VFPCLASSPDZrik 2506 +VFPCLASSPHZ 2507 +VFPCLASSPHZmbi 2508 +VFPCLASSPHZmbik 2509 +VFPCLASSPHZmi 2510 +VFPCLASSPHZmik 2511 +VFPCLASSPHZri 2512 +VFPCLASSPHZrik 2513 +VFPCLASSPSZ 2514 +VFPCLASSPSZmbi 2515 +VFPCLASSPSZmbik 2516 +VFPCLASSPSZmi 2517 +VFPCLASSPSZmik 2518 +VFPCLASSPSZri 2519 +VFPCLASSPSZrik 2520 +VFPCLASSSDZmi 2521 +VFPCLASSSDZmik 2522 +VFPCLASSSDZri 2523 +VFPCLASSSDZrik 2524 +VFPCLASSSHZmi 2525 +VFPCLASSSHZmik 2526 +VFPCLASSSHZri 2527 +VFPCLASSSHZrik 2528 +VFPCLASSSSZmi 2529 +VFPCLASSSSZmik 2530 +VFPCLASSSSZri 2531 +VFPCLASSSSZrik 2532 +VFRCZPDYrm 2533 +VFRCZPDYrr 2534 +VFRCZPDrm 2535 +VFRCZPDrr 2536 +VFRCZPSYrm 2537 +VFRCZPSYrr 2538 +VFRCZPSrm 2539 +VFRCZPSrr 2540 +VFRCZSDrm 2541 +VFRCZSDrr 2542 +VFRCZSSrm 2543 +VFRCZSSrr 2544 +VGATHERDPDYrm 2545 +VGATHERDPDZ 2546 +VGATHERDPDZrm 2547 +VGATHERDPDrm 2548 +VGATHERDPSYrm 2549 +VGATHERDPSZ 2550 +VGATHERDPSZrm 2551 +VGATHERDPSrm 2552 +VGATHERPF 2553 +VGATHERQPDYrm 2554 +VGATHERQPDZ 2555 +VGATHERQPDZrm 2556 +VGATHERQPDrm 2557 +VGATHERQPSYrm 2558 +VGATHERQPSZ 2559 +VGATHERQPSZrm 2560 +VGATHERQPSrm 2561 +VGETEXPBF 2562 +VGETEXPPDZ 2563 +VGETEXPPDZm 2564 +VGETEXPPDZmb 2565 +VGETEXPPDZmbk 2566 +VGETEXPPDZmbkz 2567 +VGETEXPPDZmk 2568 +VGETEXPPDZmkz 2569 +VGETEXPPDZr 2570 +VGETEXPPDZrb 2571 +VGETEXPPDZrbk 2572 +VGETEXPPDZrbkz 2573 +VGETEXPPDZrk 2574 +VGETEXPPDZrkz 2575 +VGETEXPPHZ 2576 +VGETEXPPHZm 2577 +VGETEXPPHZmb 2578 +VGETEXPPHZmbk 2579 +VGETEXPPHZmbkz 2580 +VGETEXPPHZmk 2581 +VGETEXPPHZmkz 2582 +VGETEXPPHZr 2583 +VGETEXPPHZrb 2584 +VGETEXPPHZrbk 2585 +VGETEXPPHZrbkz 2586 +VGETEXPPHZrk 2587 +VGETEXPPHZrkz 2588 +VGETEXPPSZ 2589 +VGETEXPPSZm 2590 +VGETEXPPSZmb 2591 +VGETEXPPSZmbk 2592 +VGETEXPPSZmbkz 2593 +VGETEXPPSZmk 2594 +VGETEXPPSZmkz 2595 +VGETEXPPSZr 2596 +VGETEXPPSZrb 2597 +VGETEXPPSZrbk 2598 +VGETEXPPSZrbkz 2599 +VGETEXPPSZrk 2600 +VGETEXPPSZrkz 2601 +VGETEXPSDZm 2602 +VGETEXPSDZmk 2603 +VGETEXPSDZmkz 2604 +VGETEXPSDZr 2605 +VGETEXPSDZrb 2606 +VGETEXPSDZrbk 2607 +VGETEXPSDZrbkz 2608 +VGETEXPSDZrk 2609 +VGETEXPSDZrkz 2610 +VGETEXPSHZm 2611 +VGETEXPSHZmk 2612 +VGETEXPSHZmkz 2613 +VGETEXPSHZr 2614 +VGETEXPSHZrb 2615 +VGETEXPSHZrbk 2616 +VGETEXPSHZrbkz 2617 +VGETEXPSHZrk 2618 +VGETEXPSHZrkz 2619 +VGETEXPSSZm 2620 +VGETEXPSSZmk 2621 +VGETEXPSSZmkz 2622 +VGETEXPSSZr 2623 +VGETEXPSSZrb 2624 +VGETEXPSSZrbk 2625 +VGETEXPSSZrbkz 2626 +VGETEXPSSZrk 2627 +VGETEXPSSZrkz 2628 +VGETMANTBF 2629 +VGETMANTPDZ 2630 +VGETMANTPDZrmbi 2631 +VGETMANTPDZrmbik 2632 +VGETMANTPDZrmbikz 2633 +VGETMANTPDZrmi 2634 +VGETMANTPDZrmik 2635 +VGETMANTPDZrmikz 2636 +VGETMANTPDZrri 2637 +VGETMANTPDZrrib 2638 +VGETMANTPDZrribk 2639 +VGETMANTPDZrribkz 2640 +VGETMANTPDZrrik 2641 +VGETMANTPDZrrikz 2642 +VGETMANTPHZ 2643 +VGETMANTPHZrmbi 2644 +VGETMANTPHZrmbik 2645 +VGETMANTPHZrmbikz 2646 +VGETMANTPHZrmi 2647 +VGETMANTPHZrmik 2648 +VGETMANTPHZrmikz 2649 +VGETMANTPHZrri 2650 +VGETMANTPHZrrib 2651 +VGETMANTPHZrribk 2652 +VGETMANTPHZrribkz 2653 +VGETMANTPHZrrik 2654 +VGETMANTPHZrrikz 2655 +VGETMANTPSZ 2656 +VGETMANTPSZrmbi 2657 +VGETMANTPSZrmbik 2658 +VGETMANTPSZrmbikz 2659 +VGETMANTPSZrmi 2660 +VGETMANTPSZrmik 2661 +VGETMANTPSZrmikz 2662 +VGETMANTPSZrri 2663 +VGETMANTPSZrrib 2664 +VGETMANTPSZrribk 2665 +VGETMANTPSZrribkz 2666 +VGETMANTPSZrrik 2667 +VGETMANTPSZrrikz 2668 +VGETMANTSDZrmi 2669 +VGETMANTSDZrmik 2670 +VGETMANTSDZrmikz 2671 +VGETMANTSDZrri 2672 +VGETMANTSDZrrib 2673 +VGETMANTSDZrribk 2674 +VGETMANTSDZrribkz 2675 +VGETMANTSDZrrik 2676 +VGETMANTSDZrrikz 2677 +VGETMANTSHZrmi 2678 +VGETMANTSHZrmik 2679 +VGETMANTSHZrmikz 2680 +VGETMANTSHZrri 2681 +VGETMANTSHZrrib 2682 +VGETMANTSHZrribk 2683 +VGETMANTSHZrribkz 2684 +VGETMANTSHZrrik 2685 +VGETMANTSHZrrikz 2686 +VGETMANTSSZrmi 2687 +VGETMANTSSZrmik 2688 +VGETMANTSSZrmikz 2689 +VGETMANTSSZrri 2690 +VGETMANTSSZrrib 2691 +VGETMANTSSZrribk 2692 +VGETMANTSSZrribkz 2693 +VGETMANTSSZrrik 2694 +VGETMANTSSZrrikz 2695 +VGF 2696 +VHADDPDYrm 2697 +VHADDPDYrr 2698 +VHADDPDrm 2699 +VHADDPDrr 2700 +VHADDPSYrm 2701 +VHADDPSYrr 2702 +VHADDPSrm 2703 +VHADDPSrr 2704 +VHSUBPDYrm 2705 +VHSUBPDYrr 2706 +VHSUBPDrm 2707 +VHSUBPDrr 2708 +VHSUBPSYrm 2709 +VHSUBPSYrr 2710 +VHSUBPSrm 2711 +VHSUBPSrr 2712 +VINSERTF 2713 +VINSERTI 2714 +VINSERTPSZrmi 2715 +VINSERTPSZrri 2716 +VINSERTPSrmi 2717 +VINSERTPSrri 2718 +VLDDQUYrm 2719 +VLDDQUrm 2720 +VLDMXCSR 2721 +VMASKMOVDQU 2722 +VMASKMOVPDYmr 2723 +VMASKMOVPDYrm 2724 +VMASKMOVPDmr 2725 +VMASKMOVPDrm 2726 +VMASKMOVPSYmr 2727 +VMASKMOVPSYrm 2728 +VMASKMOVPSmr 2729 +VMASKMOVPSrm 2730 +VMAXBF 2731 +VMAXCPDYrm 2732 +VMAXCPDYrr 2733 +VMAXCPDZ 2734 +VMAXCPDZrm 2735 +VMAXCPDZrmb 2736 +VMAXCPDZrmbk 2737 +VMAXCPDZrmbkz 2738 +VMAXCPDZrmk 2739 +VMAXCPDZrmkz 2740 +VMAXCPDZrr 2741 +VMAXCPDZrrk 2742 +VMAXCPDZrrkz 2743 +VMAXCPDrm 2744 +VMAXCPDrr 2745 +VMAXCPHZ 2746 +VMAXCPHZrm 2747 +VMAXCPHZrmb 2748 +VMAXCPHZrmbk 2749 +VMAXCPHZrmbkz 2750 +VMAXCPHZrmk 2751 +VMAXCPHZrmkz 2752 +VMAXCPHZrr 2753 +VMAXCPHZrrk 2754 +VMAXCPHZrrkz 2755 +VMAXCPSYrm 2756 +VMAXCPSYrr 2757 +VMAXCPSZ 2758 +VMAXCPSZrm 2759 +VMAXCPSZrmb 2760 +VMAXCPSZrmbk 2761 +VMAXCPSZrmbkz 2762 +VMAXCPSZrmk 2763 +VMAXCPSZrmkz 2764 +VMAXCPSZrr 2765 +VMAXCPSZrrk 2766 +VMAXCPSZrrkz 2767 +VMAXCPSrm 2768 +VMAXCPSrr 2769 +VMAXCSDZrm 2770 +VMAXCSDZrr 2771 +VMAXCSDrm 2772 +VMAXCSDrr 2773 +VMAXCSHZrm 2774 +VMAXCSHZrr 2775 +VMAXCSSZrm 2776 +VMAXCSSZrr 2777 +VMAXCSSrm 2778 +VMAXCSSrr 2779 +VMAXPDYrm 2780 +VMAXPDYrr 2781 +VMAXPDZ 2782 +VMAXPDZrm 2783 +VMAXPDZrmb 2784 +VMAXPDZrmbk 2785 +VMAXPDZrmbkz 2786 +VMAXPDZrmk 2787 +VMAXPDZrmkz 2788 +VMAXPDZrr 2789 +VMAXPDZrrb 2790 +VMAXPDZrrbk 2791 +VMAXPDZrrbkz 2792 +VMAXPDZrrk 2793 +VMAXPDZrrkz 2794 +VMAXPDrm 2795 +VMAXPDrr 2796 +VMAXPHZ 2797 +VMAXPHZrm 2798 +VMAXPHZrmb 2799 +VMAXPHZrmbk 2800 +VMAXPHZrmbkz 2801 +VMAXPHZrmk 2802 +VMAXPHZrmkz 2803 +VMAXPHZrr 2804 +VMAXPHZrrb 2805 +VMAXPHZrrbk 2806 +VMAXPHZrrbkz 2807 +VMAXPHZrrk 2808 +VMAXPHZrrkz 2809 +VMAXPSYrm 2810 +VMAXPSYrr 2811 +VMAXPSZ 2812 +VMAXPSZrm 2813 +VMAXPSZrmb 2814 +VMAXPSZrmbk 2815 +VMAXPSZrmbkz 2816 +VMAXPSZrmk 2817 +VMAXPSZrmkz 2818 +VMAXPSZrr 2819 +VMAXPSZrrb 2820 +VMAXPSZrrbk 2821 +VMAXPSZrrbkz 2822 +VMAXPSZrrk 2823 +VMAXPSZrrkz 2824 +VMAXPSrm 2825 +VMAXPSrr 2826 +VMAXSDZrm 2827 +VMAXSDZrm_Int 2828 +VMAXSDZrmk_Int 2829 +VMAXSDZrmkz_Int 2830 +VMAXSDZrr 2831 +VMAXSDZrr_Int 2832 +VMAXSDZrrb_Int 2833 +VMAXSDZrrbk_Int 2834 +VMAXSDZrrbkz_Int 2835 +VMAXSDZrrk_Int 2836 +VMAXSDZrrkz_Int 2837 +VMAXSDrm 2838 +VMAXSDrm_Int 2839 +VMAXSDrr 2840 +VMAXSDrr_Int 2841 +VMAXSHZrm 2842 +VMAXSHZrm_Int 2843 +VMAXSHZrmk_Int 2844 +VMAXSHZrmkz_Int 2845 +VMAXSHZrr 2846 +VMAXSHZrr_Int 2847 +VMAXSHZrrb_Int 2848 +VMAXSHZrrbk_Int 2849 +VMAXSHZrrbkz_Int 2850 +VMAXSHZrrk_Int 2851 +VMAXSHZrrkz_Int 2852 +VMAXSSZrm 2853 +VMAXSSZrm_Int 2854 +VMAXSSZrmk_Int 2855 +VMAXSSZrmkz_Int 2856 +VMAXSSZrr 2857 +VMAXSSZrr_Int 2858 +VMAXSSZrrb_Int 2859 +VMAXSSZrrbk_Int 2860 +VMAXSSZrrbkz_Int 2861 +VMAXSSZrrk_Int 2862 +VMAXSSZrrkz_Int 2863 +VMAXSSrm 2864 +VMAXSSrm_Int 2865 +VMAXSSrr 2866 +VMAXSSrr_Int 2867 +VMCALL 2868 +VMCLEARm 2869 +VMFUNC 2870 +VMINBF 2871 +VMINCPDYrm 2872 +VMINCPDYrr 2873 +VMINCPDZ 2874 +VMINCPDZrm 2875 +VMINCPDZrmb 2876 +VMINCPDZrmbk 2877 +VMINCPDZrmbkz 2878 +VMINCPDZrmk 2879 +VMINCPDZrmkz 2880 +VMINCPDZrr 2881 +VMINCPDZrrk 2882 +VMINCPDZrrkz 2883 +VMINCPDrm 2884 +VMINCPDrr 2885 +VMINCPHZ 2886 +VMINCPHZrm 2887 +VMINCPHZrmb 2888 +VMINCPHZrmbk 2889 +VMINCPHZrmbkz 2890 +VMINCPHZrmk 2891 +VMINCPHZrmkz 2892 +VMINCPHZrr 2893 +VMINCPHZrrk 2894 +VMINCPHZrrkz 2895 +VMINCPSYrm 2896 +VMINCPSYrr 2897 +VMINCPSZ 2898 +VMINCPSZrm 2899 +VMINCPSZrmb 2900 +VMINCPSZrmbk 2901 +VMINCPSZrmbkz 2902 +VMINCPSZrmk 2903 +VMINCPSZrmkz 2904 +VMINCPSZrr 2905 +VMINCPSZrrk 2906 +VMINCPSZrrkz 2907 +VMINCPSrm 2908 +VMINCPSrr 2909 +VMINCSDZrm 2910 +VMINCSDZrr 2911 +VMINCSDrm 2912 +VMINCSDrr 2913 +VMINCSHZrm 2914 +VMINCSHZrr 2915 +VMINCSSZrm 2916 +VMINCSSZrr 2917 +VMINCSSrm 2918 +VMINCSSrr 2919 +VMINMAXBF 2920 +VMINMAXPDZ 2921 +VMINMAXPDZrmbi 2922 +VMINMAXPDZrmbik 2923 +VMINMAXPDZrmbikz 2924 +VMINMAXPDZrmi 2925 +VMINMAXPDZrmik 2926 +VMINMAXPDZrmikz 2927 +VMINMAXPDZrri 2928 +VMINMAXPDZrrib 2929 +VMINMAXPDZrribk 2930 +VMINMAXPDZrribkz 2931 +VMINMAXPDZrrik 2932 +VMINMAXPDZrrikz 2933 +VMINMAXPHZ 2934 +VMINMAXPHZrmbi 2935 +VMINMAXPHZrmbik 2936 +VMINMAXPHZrmbikz 2937 +VMINMAXPHZrmi 2938 +VMINMAXPHZrmik 2939 +VMINMAXPHZrmikz 2940 +VMINMAXPHZrri 2941 +VMINMAXPHZrrib 2942 +VMINMAXPHZrribk 2943 +VMINMAXPHZrribkz 2944 +VMINMAXPHZrrik 2945 +VMINMAXPHZrrikz 2946 +VMINMAXPSZ 2947 +VMINMAXPSZrmbi 2948 +VMINMAXPSZrmbik 2949 +VMINMAXPSZrmbikz 2950 +VMINMAXPSZrmi 2951 +VMINMAXPSZrmik 2952 +VMINMAXPSZrmikz 2953 +VMINMAXPSZrri 2954 +VMINMAXPSZrrib 2955 +VMINMAXPSZrribk 2956 +VMINMAXPSZrribkz 2957 +VMINMAXPSZrrik 2958 +VMINMAXPSZrrikz 2959 +VMINMAXSDrmi 2960 +VMINMAXSDrmi_Int 2961 +VMINMAXSDrmik_Int 2962 +VMINMAXSDrmikz_Int 2963 +VMINMAXSDrri 2964 +VMINMAXSDrri_Int 2965 +VMINMAXSDrrib_Int 2966 +VMINMAXSDrribk_Int 2967 +VMINMAXSDrribkz_Int 2968 +VMINMAXSDrrik_Int 2969 +VMINMAXSDrrikz_Int 2970 +VMINMAXSHrmi 2971 +VMINMAXSHrmi_Int 2972 +VMINMAXSHrmik_Int 2973 +VMINMAXSHrmikz_Int 2974 +VMINMAXSHrri 2975 +VMINMAXSHrri_Int 2976 +VMINMAXSHrrib_Int 2977 +VMINMAXSHrribk_Int 2978 +VMINMAXSHrribkz_Int 2979 +VMINMAXSHrrik_Int 2980 +VMINMAXSHrrikz_Int 2981 +VMINMAXSSrmi 2982 +VMINMAXSSrmi_Int 2983 +VMINMAXSSrmik_Int 2984 +VMINMAXSSrmikz_Int 2985 +VMINMAXSSrri 2986 +VMINMAXSSrri_Int 2987 +VMINMAXSSrrib_Int 2988 +VMINMAXSSrribk_Int 2989 +VMINMAXSSrribkz_Int 2990 +VMINMAXSSrrik_Int 2991 +VMINMAXSSrrikz_Int 2992 +VMINPDYrm 2993 +VMINPDYrr 2994 +VMINPDZ 2995 +VMINPDZrm 2996 +VMINPDZrmb 2997 +VMINPDZrmbk 2998 +VMINPDZrmbkz 2999 +VMINPDZrmk 3000 +VMINPDZrmkz 3001 +VMINPDZrr 3002 +VMINPDZrrb 3003 +VMINPDZrrbk 3004 +VMINPDZrrbkz 3005 +VMINPDZrrk 3006 +VMINPDZrrkz 3007 +VMINPDrm 3008 +VMINPDrr 3009 +VMINPHZ 3010 +VMINPHZrm 3011 +VMINPHZrmb 3012 +VMINPHZrmbk 3013 +VMINPHZrmbkz 3014 +VMINPHZrmk 3015 +VMINPHZrmkz 3016 +VMINPHZrr 3017 +VMINPHZrrb 3018 +VMINPHZrrbk 3019 +VMINPHZrrbkz 3020 +VMINPHZrrk 3021 +VMINPHZrrkz 3022 +VMINPSYrm 3023 +VMINPSYrr 3024 +VMINPSZ 3025 +VMINPSZrm 3026 +VMINPSZrmb 3027 +VMINPSZrmbk 3028 +VMINPSZrmbkz 3029 +VMINPSZrmk 3030 +VMINPSZrmkz 3031 +VMINPSZrr 3032 +VMINPSZrrb 3033 +VMINPSZrrbk 3034 +VMINPSZrrbkz 3035 +VMINPSZrrk 3036 +VMINPSZrrkz 3037 +VMINPSrm 3038 +VMINPSrr 3039 +VMINSDZrm 3040 +VMINSDZrm_Int 3041 +VMINSDZrmk_Int 3042 +VMINSDZrmkz_Int 3043 +VMINSDZrr 3044 +VMINSDZrr_Int 3045 +VMINSDZrrb_Int 3046 +VMINSDZrrbk_Int 3047 +VMINSDZrrbkz_Int 3048 +VMINSDZrrk_Int 3049 +VMINSDZrrkz_Int 3050 +VMINSDrm 3051 +VMINSDrm_Int 3052 +VMINSDrr 3053 +VMINSDrr_Int 3054 +VMINSHZrm 3055 +VMINSHZrm_Int 3056 +VMINSHZrmk_Int 3057 +VMINSHZrmkz_Int 3058 +VMINSHZrr 3059 +VMINSHZrr_Int 3060 +VMINSHZrrb_Int 3061 +VMINSHZrrbk_Int 3062 +VMINSHZrrbkz_Int 3063 +VMINSHZrrk_Int 3064 +VMINSHZrrkz_Int 3065 +VMINSSZrm 3066 +VMINSSZrm_Int 3067 +VMINSSZrmk_Int 3068 +VMINSSZrmkz_Int 3069 +VMINSSZrr 3070 +VMINSSZrr_Int 3071 +VMINSSZrrb_Int 3072 +VMINSSZrrbk_Int 3073 +VMINSSZrrbkz_Int 3074 +VMINSSZrrk_Int 3075 +VMINSSZrrkz_Int 3076 +VMINSSrm 3077 +VMINSSrm_Int 3078 +VMINSSrr 3079 +VMINSSrr_Int 3080 +VMLAUNCH 3081 +VMLOAD 3082 +VMMCALL 3083 +VMOV 3084 +VMOVAPDYmr 3085 +VMOVAPDYrm 3086 +VMOVAPDYrr 3087 +VMOVAPDYrr_REV 3088 +VMOVAPDZ 3089 +VMOVAPDZmr 3090 +VMOVAPDZmrk 3091 +VMOVAPDZrm 3092 +VMOVAPDZrmk 3093 +VMOVAPDZrmkz 3094 +VMOVAPDZrr 3095 +VMOVAPDZrr_REV 3096 +VMOVAPDZrrk 3097 +VMOVAPDZrrk_REV 3098 +VMOVAPDZrrkz 3099 +VMOVAPDZrrkz_REV 3100 +VMOVAPDmr 3101 +VMOVAPDrm 3102 +VMOVAPDrr 3103 +VMOVAPDrr_REV 3104 +VMOVAPSYmr 3105 +VMOVAPSYrm 3106 +VMOVAPSYrr 3107 +VMOVAPSYrr_REV 3108 +VMOVAPSZ 3109 +VMOVAPSZmr 3110 +VMOVAPSZmrk 3111 +VMOVAPSZrm 3112 +VMOVAPSZrmk 3113 +VMOVAPSZrmkz 3114 +VMOVAPSZrr 3115 +VMOVAPSZrr_REV 3116 +VMOVAPSZrrk 3117 +VMOVAPSZrrk_REV 3118 +VMOVAPSZrrkz 3119 +VMOVAPSZrrkz_REV 3120 +VMOVAPSmr 3121 +VMOVAPSrm 3122 +VMOVAPSrr 3123 +VMOVAPSrr_REV 3124 +VMOVDDUPYrm 3125 +VMOVDDUPYrr 3126 +VMOVDDUPZ 3127 +VMOVDDUPZrm 3128 +VMOVDDUPZrmk 3129 +VMOVDDUPZrmkz 3130 +VMOVDDUPZrr 3131 +VMOVDDUPZrrk 3132 +VMOVDDUPZrrkz 3133 +VMOVDDUPrm 3134 +VMOVDDUPrr 3135 +VMOVDI 3136 +VMOVDQA 3137 +VMOVDQAYmr 3138 +VMOVDQAYrm 3139 +VMOVDQAYrr 3140 +VMOVDQAYrr_REV 3141 +VMOVDQAmr 3142 +VMOVDQArm 3143 +VMOVDQArr 3144 +VMOVDQArr_REV 3145 +VMOVDQU 3146 +VMOVDQUYmr 3147 +VMOVDQUYrm 3148 +VMOVDQUYrr 3149 +VMOVDQUYrr_REV 3150 +VMOVDQUmr 3151 +VMOVDQUrm 3152 +VMOVDQUrr 3153 +VMOVDQUrr_REV 3154 +VMOVHLPSZrr 3155 +VMOVHLPSrr 3156 +VMOVHPDZ 3157 +VMOVHPDmr 3158 +VMOVHPDrm 3159 +VMOVHPSZ 3160 +VMOVHPSmr 3161 +VMOVHPSrm 3162 +VMOVLHPSZrr 3163 +VMOVLHPSrr 3164 +VMOVLPDZ 3165 +VMOVLPDmr 3166 +VMOVLPDrm 3167 +VMOVLPSZ 3168 +VMOVLPSmr 3169 +VMOVLPSrm 3170 +VMOVMSKPDYrr 3171 +VMOVMSKPDrr 3172 +VMOVMSKPSYrr 3173 +VMOVMSKPSrr 3174 +VMOVNTDQAYrm 3175 +VMOVNTDQAZ 3176 +VMOVNTDQAZrm 3177 +VMOVNTDQArm 3178 +VMOVNTDQYmr 3179 +VMOVNTDQZ 3180 +VMOVNTDQZmr 3181 +VMOVNTDQmr 3182 +VMOVNTPDYmr 3183 +VMOVNTPDZ 3184 +VMOVNTPDZmr 3185 +VMOVNTPDmr 3186 +VMOVNTPSYmr 3187 +VMOVNTPSZ 3188 +VMOVNTPSZmr 3189 +VMOVNTPSmr 3190 +VMOVPDI 3191 +VMOVPQI 3192 +VMOVPQIto 3193 +VMOVQI 3194 +VMOVRSBZ 3195 +VMOVRSBZm 3196 +VMOVRSBZmk 3197 +VMOVRSBZmkz 3198 +VMOVRSDZ 3199 +VMOVRSDZm 3200 +VMOVRSDZmk 3201 +VMOVRSDZmkz 3202 +VMOVRSQZ 3203 +VMOVRSQZm 3204 +VMOVRSQZmk 3205 +VMOVRSQZmkz 3206 +VMOVRSWZ 3207 +VMOVRSWZm 3208 +VMOVRSWZmk 3209 +VMOVRSWZmkz 3210 +VMOVSDZmr 3211 +VMOVSDZmrk 3212 +VMOVSDZrm 3213 +VMOVSDZrm_alt 3214 +VMOVSDZrmk 3215 +VMOVSDZrmkz 3216 +VMOVSDZrr 3217 +VMOVSDZrr_REV 3218 +VMOVSDZrrk 3219 +VMOVSDZrrk_REV 3220 +VMOVSDZrrkz 3221 +VMOVSDZrrkz_REV 3222 +VMOVSDmr 3223 +VMOVSDrm 3224 +VMOVSDrm_alt 3225 +VMOVSDrr 3226 +VMOVSDrr_REV 3227 +VMOVSDto 3228 +VMOVSH 3229 +VMOVSHDUPYrm 3230 +VMOVSHDUPYrr 3231 +VMOVSHDUPZ 3232 +VMOVSHDUPZrm 3233 +VMOVSHDUPZrmk 3234 +VMOVSHDUPZrmkz 3235 +VMOVSHDUPZrr 3236 +VMOVSHDUPZrrk 3237 +VMOVSHDUPZrrkz 3238 +VMOVSHDUPrm 3239 +VMOVSHDUPrr 3240 +VMOVSHZmr 3241 +VMOVSHZmrk 3242 +VMOVSHZrm 3243 +VMOVSHZrm_alt 3244 +VMOVSHZrmk 3245 +VMOVSHZrmkz 3246 +VMOVSHZrr 3247 +VMOVSHZrr_REV 3248 +VMOVSHZrrk 3249 +VMOVSHZrrk_REV 3250 +VMOVSHZrrkz 3251 +VMOVSHZrrkz_REV 3252 +VMOVSHtoW 3253 +VMOVSLDUPYrm 3254 +VMOVSLDUPYrr 3255 +VMOVSLDUPZ 3256 +VMOVSLDUPZrm 3257 +VMOVSLDUPZrmk 3258 +VMOVSLDUPZrmkz 3259 +VMOVSLDUPZrr 3260 +VMOVSLDUPZrrk 3261 +VMOVSLDUPZrrkz 3262 +VMOVSLDUPrm 3263 +VMOVSLDUPrr 3264 +VMOVSS 3265 +VMOVSSZmr 3266 +VMOVSSZmrk 3267 +VMOVSSZrm 3268 +VMOVSSZrm_alt 3269 +VMOVSSZrmk 3270 +VMOVSSZrmkz 3271 +VMOVSSZrr 3272 +VMOVSSZrr_REV 3273 +VMOVSSZrrk 3274 +VMOVSSZrrk_REV 3275 +VMOVSSZrrkz 3276 +VMOVSSZrrkz_REV 3277 +VMOVSSmr 3278 +VMOVSSrm 3279 +VMOVSSrm_alt 3280 +VMOVSSrr 3281 +VMOVSSrr_REV 3282 +VMOVUPDYmr 3283 +VMOVUPDYrm 3284 +VMOVUPDYrr 3285 +VMOVUPDYrr_REV 3286 +VMOVUPDZ 3287 +VMOVUPDZmr 3288 +VMOVUPDZmrk 3289 +VMOVUPDZrm 3290 +VMOVUPDZrmk 3291 +VMOVUPDZrmkz 3292 +VMOVUPDZrr 3293 +VMOVUPDZrr_REV 3294 +VMOVUPDZrrk 3295 +VMOVUPDZrrk_REV 3296 +VMOVUPDZrrkz 3297 +VMOVUPDZrrkz_REV 3298 +VMOVUPDmr 3299 +VMOVUPDrm 3300 +VMOVUPDrr 3301 +VMOVUPDrr_REV 3302 +VMOVUPSYmr 3303 +VMOVUPSYrm 3304 +VMOVUPSYrr 3305 +VMOVUPSYrr_REV 3306 +VMOVUPSZ 3307 +VMOVUPSZmr 3308 +VMOVUPSZmrk 3309 +VMOVUPSZrm 3310 +VMOVUPSZrmk 3311 +VMOVUPSZrmkz 3312 +VMOVUPSZrr 3313 +VMOVUPSZrr_REV 3314 +VMOVUPSZrrk 3315 +VMOVUPSZrrk_REV 3316 +VMOVUPSZrrkz 3317 +VMOVUPSZrrkz_REV 3318 +VMOVUPSmr 3319 +VMOVUPSrm 3320 +VMOVUPSrr 3321 +VMOVUPSrr_REV 3322 +VMOVW 3323 +VMOVWmr 3324 +VMOVWrm 3325 +VMOVZPDILo 3326 +VMOVZPQILo 3327 +VMOVZPWILo 3328 +VMPSADBWYrmi 3329 +VMPSADBWYrri 3330 +VMPSADBWZ 3331 +VMPSADBWZrmi 3332 +VMPSADBWZrmik 3333 +VMPSADBWZrmikz 3334 +VMPSADBWZrri 3335 +VMPSADBWZrrik 3336 +VMPSADBWZrrikz 3337 +VMPSADBWrmi 3338 +VMPSADBWrri 3339 +VMPTRLDm 3340 +VMPTRSTm 3341 +VMREAD 3342 +VMRESUME 3343 +VMRUN 3344 +VMSAVE 3345 +VMULBF 3346 +VMULPDYrm 3347 +VMULPDYrr 3348 +VMULPDZ 3349 +VMULPDZrm 3350 +VMULPDZrmb 3351 +VMULPDZrmbk 3352 +VMULPDZrmbkz 3353 +VMULPDZrmk 3354 +VMULPDZrmkz 3355 +VMULPDZrr 3356 +VMULPDZrrb 3357 +VMULPDZrrbk 3358 +VMULPDZrrbkz 3359 +VMULPDZrrk 3360 +VMULPDZrrkz 3361 +VMULPDrm 3362 +VMULPDrr 3363 +VMULPHZ 3364 +VMULPHZrm 3365 +VMULPHZrmb 3366 +VMULPHZrmbk 3367 +VMULPHZrmbkz 3368 +VMULPHZrmk 3369 +VMULPHZrmkz 3370 +VMULPHZrr 3371 +VMULPHZrrb 3372 +VMULPHZrrbk 3373 +VMULPHZrrbkz 3374 +VMULPHZrrk 3375 +VMULPHZrrkz 3376 +VMULPSYrm 3377 +VMULPSYrr 3378 +VMULPSZ 3379 +VMULPSZrm 3380 +VMULPSZrmb 3381 +VMULPSZrmbk 3382 +VMULPSZrmbkz 3383 +VMULPSZrmk 3384 +VMULPSZrmkz 3385 +VMULPSZrr 3386 +VMULPSZrrb 3387 +VMULPSZrrbk 3388 +VMULPSZrrbkz 3389 +VMULPSZrrk 3390 +VMULPSZrrkz 3391 +VMULPSrm 3392 +VMULPSrr 3393 +VMULSDZrm 3394 +VMULSDZrm_Int 3395 +VMULSDZrmk_Int 3396 +VMULSDZrmkz_Int 3397 +VMULSDZrr 3398 +VMULSDZrr_Int 3399 +VMULSDZrrb_Int 3400 +VMULSDZrrbk_Int 3401 +VMULSDZrrbkz_Int 3402 +VMULSDZrrk_Int 3403 +VMULSDZrrkz_Int 3404 +VMULSDrm 3405 +VMULSDrm_Int 3406 +VMULSDrr 3407 +VMULSDrr_Int 3408 +VMULSHZrm 3409 +VMULSHZrm_Int 3410 +VMULSHZrmk_Int 3411 +VMULSHZrmkz_Int 3412 +VMULSHZrr 3413 +VMULSHZrr_Int 3414 +VMULSHZrrb_Int 3415 +VMULSHZrrbk_Int 3416 +VMULSHZrrbkz_Int 3417 +VMULSHZrrk_Int 3418 +VMULSHZrrkz_Int 3419 +VMULSSZrm 3420 +VMULSSZrm_Int 3421 +VMULSSZrmk_Int 3422 +VMULSSZrmkz_Int 3423 +VMULSSZrr 3424 +VMULSSZrr_Int 3425 +VMULSSZrrb_Int 3426 +VMULSSZrrbk_Int 3427 +VMULSSZrrbkz_Int 3428 +VMULSSZrrk_Int 3429 +VMULSSZrrkz_Int 3430 +VMULSSrm 3431 +VMULSSrm_Int 3432 +VMULSSrr 3433 +VMULSSrr_Int 3434 +VMWRITE 3435 +VMXOFF 3436 +VMXON 3437 +VORPDYrm 3438 +VORPDYrr 3439 +VORPDZ 3440 +VORPDZrm 3441 +VORPDZrmb 3442 +VORPDZrmbk 3443 +VORPDZrmbkz 3444 +VORPDZrmk 3445 +VORPDZrmkz 3446 +VORPDZrr 3447 +VORPDZrrk 3448 +VORPDZrrkz 3449 +VORPDrm 3450 +VORPDrr 3451 +VORPSYrm 3452 +VORPSYrr 3453 +VORPSZ 3454 +VORPSZrm 3455 +VORPSZrmb 3456 +VORPSZrmbk 3457 +VORPSZrmbkz 3458 +VORPSZrmk 3459 +VORPSZrmkz 3460 +VORPSZrr 3461 +VORPSZrrk 3462 +VORPSZrrkz 3463 +VORPSrm 3464 +VORPSrr 3465 +VP 3466 +VPABSBYrm 3467 +VPABSBYrr 3468 +VPABSBZ 3469 +VPABSBZrm 3470 +VPABSBZrmk 3471 +VPABSBZrmkz 3472 +VPABSBZrr 3473 +VPABSBZrrk 3474 +VPABSBZrrkz 3475 +VPABSBrm 3476 +VPABSBrr 3477 +VPABSDYrm 3478 +VPABSDYrr 3479 +VPABSDZ 3480 +VPABSDZrm 3481 +VPABSDZrmb 3482 +VPABSDZrmbk 3483 +VPABSDZrmbkz 3484 +VPABSDZrmk 3485 +VPABSDZrmkz 3486 +VPABSDZrr 3487 +VPABSDZrrk 3488 +VPABSDZrrkz 3489 +VPABSDrm 3490 +VPABSDrr 3491 +VPABSQZ 3492 +VPABSQZrm 3493 +VPABSQZrmb 3494 +VPABSQZrmbk 3495 +VPABSQZrmbkz 3496 +VPABSQZrmk 3497 +VPABSQZrmkz 3498 +VPABSQZrr 3499 +VPABSQZrrk 3500 +VPABSQZrrkz 3501 +VPABSWYrm 3502 +VPABSWYrr 3503 +VPABSWZ 3504 +VPABSWZrm 3505 +VPABSWZrmk 3506 +VPABSWZrmkz 3507 +VPABSWZrr 3508 +VPABSWZrrk 3509 +VPABSWZrrkz 3510 +VPABSWrm 3511 +VPABSWrr 3512 +VPACKSSDWYrm 3513 +VPACKSSDWYrr 3514 +VPACKSSDWZ 3515 +VPACKSSDWZrm 3516 +VPACKSSDWZrmb 3517 +VPACKSSDWZrmbk 3518 +VPACKSSDWZrmbkz 3519 +VPACKSSDWZrmk 3520 +VPACKSSDWZrmkz 3521 +VPACKSSDWZrr 3522 +VPACKSSDWZrrk 3523 +VPACKSSDWZrrkz 3524 +VPACKSSDWrm 3525 +VPACKSSDWrr 3526 +VPACKSSWBYrm 3527 +VPACKSSWBYrr 3528 +VPACKSSWBZ 3529 +VPACKSSWBZrm 3530 +VPACKSSWBZrmk 3531 +VPACKSSWBZrmkz 3532 +VPACKSSWBZrr 3533 +VPACKSSWBZrrk 3534 +VPACKSSWBZrrkz 3535 +VPACKSSWBrm 3536 +VPACKSSWBrr 3537 +VPACKUSDWYrm 3538 +VPACKUSDWYrr 3539 +VPACKUSDWZ 3540 +VPACKUSDWZrm 3541 +VPACKUSDWZrmb 3542 +VPACKUSDWZrmbk 3543 +VPACKUSDWZrmbkz 3544 +VPACKUSDWZrmk 3545 +VPACKUSDWZrmkz 3546 +VPACKUSDWZrr 3547 +VPACKUSDWZrrk 3548 +VPACKUSDWZrrkz 3549 +VPACKUSDWrm 3550 +VPACKUSDWrr 3551 +VPACKUSWBYrm 3552 +VPACKUSWBYrr 3553 +VPACKUSWBZ 3554 +VPACKUSWBZrm 3555 +VPACKUSWBZrmk 3556 +VPACKUSWBZrmkz 3557 +VPACKUSWBZrr 3558 +VPACKUSWBZrrk 3559 +VPACKUSWBZrrkz 3560 +VPACKUSWBrm 3561 +VPACKUSWBrr 3562 +VPADDBYrm 3563 +VPADDBYrr 3564 +VPADDBZ 3565 +VPADDBZrm 3566 +VPADDBZrmk 3567 +VPADDBZrmkz 3568 +VPADDBZrr 3569 +VPADDBZrrk 3570 +VPADDBZrrkz 3571 +VPADDBrm 3572 +VPADDBrr 3573 +VPADDDYrm 3574 +VPADDDYrr 3575 +VPADDDZ 3576 +VPADDDZrm 3577 +VPADDDZrmb 3578 +VPADDDZrmbk 3579 +VPADDDZrmbkz 3580 +VPADDDZrmk 3581 +VPADDDZrmkz 3582 +VPADDDZrr 3583 +VPADDDZrrk 3584 +VPADDDZrrkz 3585 +VPADDDrm 3586 +VPADDDrr 3587 +VPADDQYrm 3588 +VPADDQYrr 3589 +VPADDQZ 3590 +VPADDQZrm 3591 +VPADDQZrmb 3592 +VPADDQZrmbk 3593 +VPADDQZrmbkz 3594 +VPADDQZrmk 3595 +VPADDQZrmkz 3596 +VPADDQZrr 3597 +VPADDQZrrk 3598 +VPADDQZrrkz 3599 +VPADDQrm 3600 +VPADDQrr 3601 +VPADDSBYrm 3602 +VPADDSBYrr 3603 +VPADDSBZ 3604 +VPADDSBZrm 3605 +VPADDSBZrmk 3606 +VPADDSBZrmkz 3607 +VPADDSBZrr 3608 +VPADDSBZrrk 3609 +VPADDSBZrrkz 3610 +VPADDSBrm 3611 +VPADDSBrr 3612 +VPADDSWYrm 3613 +VPADDSWYrr 3614 +VPADDSWZ 3615 +VPADDSWZrm 3616 +VPADDSWZrmk 3617 +VPADDSWZrmkz 3618 +VPADDSWZrr 3619 +VPADDSWZrrk 3620 +VPADDSWZrrkz 3621 +VPADDSWrm 3622 +VPADDSWrr 3623 +VPADDUSBYrm 3624 +VPADDUSBYrr 3625 +VPADDUSBZ 3626 +VPADDUSBZrm 3627 +VPADDUSBZrmk 3628 +VPADDUSBZrmkz 3629 +VPADDUSBZrr 3630 +VPADDUSBZrrk 3631 +VPADDUSBZrrkz 3632 +VPADDUSBrm 3633 +VPADDUSBrr 3634 +VPADDUSWYrm 3635 +VPADDUSWYrr 3636 +VPADDUSWZ 3637 +VPADDUSWZrm 3638 +VPADDUSWZrmk 3639 +VPADDUSWZrmkz 3640 +VPADDUSWZrr 3641 +VPADDUSWZrrk 3642 +VPADDUSWZrrkz 3643 +VPADDUSWrm 3644 +VPADDUSWrr 3645 +VPADDWYrm 3646 +VPADDWYrr 3647 +VPADDWZ 3648 +VPADDWZrm 3649 +VPADDWZrmk 3650 +VPADDWZrmkz 3651 +VPADDWZrr 3652 +VPADDWZrrk 3653 +VPADDWZrrkz 3654 +VPADDWrm 3655 +VPADDWrr 3656 +VPALIGNRYrmi 3657 +VPALIGNRYrri 3658 +VPALIGNRZ 3659 +VPALIGNRZrmi 3660 +VPALIGNRZrmik 3661 +VPALIGNRZrmikz 3662 +VPALIGNRZrri 3663 +VPALIGNRZrrik 3664 +VPALIGNRZrrikz 3665 +VPALIGNRrmi 3666 +VPALIGNRrri 3667 +VPANDDZ 3668 +VPANDDZrm 3669 +VPANDDZrmb 3670 +VPANDDZrmbk 3671 +VPANDDZrmbkz 3672 +VPANDDZrmk 3673 +VPANDDZrmkz 3674 +VPANDDZrr 3675 +VPANDDZrrk 3676 +VPANDDZrrkz 3677 +VPANDNDZ 3678 +VPANDNDZrm 3679 +VPANDNDZrmb 3680 +VPANDNDZrmbk 3681 +VPANDNDZrmbkz 3682 +VPANDNDZrmk 3683 +VPANDNDZrmkz 3684 +VPANDNDZrr 3685 +VPANDNDZrrk 3686 +VPANDNDZrrkz 3687 +VPANDNQZ 3688 +VPANDNQZrm 3689 +VPANDNQZrmb 3690 +VPANDNQZrmbk 3691 +VPANDNQZrmbkz 3692 +VPANDNQZrmk 3693 +VPANDNQZrmkz 3694 +VPANDNQZrr 3695 +VPANDNQZrrk 3696 +VPANDNQZrrkz 3697 +VPANDNYrm 3698 +VPANDNYrr 3699 +VPANDNrm 3700 +VPANDNrr 3701 +VPANDQZ 3702 +VPANDQZrm 3703 +VPANDQZrmb 3704 +VPANDQZrmbk 3705 +VPANDQZrmbkz 3706 +VPANDQZrmk 3707 +VPANDQZrmkz 3708 +VPANDQZrr 3709 +VPANDQZrrk 3710 +VPANDQZrrkz 3711 +VPANDYrm 3712 +VPANDYrr 3713 +VPANDrm 3714 +VPANDrr 3715 +VPAVGBYrm 3716 +VPAVGBYrr 3717 +VPAVGBZ 3718 +VPAVGBZrm 3719 +VPAVGBZrmk 3720 +VPAVGBZrmkz 3721 +VPAVGBZrr 3722 +VPAVGBZrrk 3723 +VPAVGBZrrkz 3724 +VPAVGBrm 3725 +VPAVGBrr 3726 +VPAVGWYrm 3727 +VPAVGWYrr 3728 +VPAVGWZ 3729 +VPAVGWZrm 3730 +VPAVGWZrmk 3731 +VPAVGWZrmkz 3732 +VPAVGWZrr 3733 +VPAVGWZrrk 3734 +VPAVGWZrrkz 3735 +VPAVGWrm 3736 +VPAVGWrr 3737 +VPBLENDDYrmi 3738 +VPBLENDDYrri 3739 +VPBLENDDrmi 3740 +VPBLENDDrri 3741 +VPBLENDMBZ 3742 +VPBLENDMBZrm 3743 +VPBLENDMBZrmk 3744 +VPBLENDMBZrmkz 3745 +VPBLENDMBZrr 3746 +VPBLENDMBZrrk 3747 +VPBLENDMBZrrkz 3748 +VPBLENDMDZ 3749 +VPBLENDMDZrm 3750 +VPBLENDMDZrmb 3751 +VPBLENDMDZrmbk 3752 +VPBLENDMDZrmbkz 3753 +VPBLENDMDZrmk 3754 +VPBLENDMDZrmkz 3755 +VPBLENDMDZrr 3756 +VPBLENDMDZrrk 3757 +VPBLENDMDZrrkz 3758 +VPBLENDMQZ 3759 +VPBLENDMQZrm 3760 +VPBLENDMQZrmb 3761 +VPBLENDMQZrmbk 3762 +VPBLENDMQZrmbkz 3763 +VPBLENDMQZrmk 3764 +VPBLENDMQZrmkz 3765 +VPBLENDMQZrr 3766 +VPBLENDMQZrrk 3767 +VPBLENDMQZrrkz 3768 +VPBLENDMWZ 3769 +VPBLENDMWZrm 3770 +VPBLENDMWZrmk 3771 +VPBLENDMWZrmkz 3772 +VPBLENDMWZrr 3773 +VPBLENDMWZrrk 3774 +VPBLENDMWZrrkz 3775 +VPBLENDVBYrmr 3776 +VPBLENDVBYrrr 3777 +VPBLENDVBrmr 3778 +VPBLENDVBrrr 3779 +VPBLENDWYrmi 3780 +VPBLENDWYrri 3781 +VPBLENDWrmi 3782 +VPBLENDWrri 3783 +VPBROADCASTBYrm 3784 +VPBROADCASTBYrr 3785 +VPBROADCASTBZ 3786 +VPBROADCASTBZrm 3787 +VPBROADCASTBZrmk 3788 +VPBROADCASTBZrmkz 3789 +VPBROADCASTBZrr 3790 +VPBROADCASTBZrrk 3791 +VPBROADCASTBZrrkz 3792 +VPBROADCASTBrZ 3793 +VPBROADCASTBrZrr 3794 +VPBROADCASTBrZrrk 3795 +VPBROADCASTBrZrrkz 3796 +VPBROADCASTBrm 3797 +VPBROADCASTBrr 3798 +VPBROADCASTDYrm 3799 +VPBROADCASTDYrr 3800 +VPBROADCASTDZ 3801 +VPBROADCASTDZrm 3802 +VPBROADCASTDZrmk 3803 +VPBROADCASTDZrmkz 3804 +VPBROADCASTDZrr 3805 +VPBROADCASTDZrrk 3806 +VPBROADCASTDZrrkz 3807 +VPBROADCASTDrZ 3808 +VPBROADCASTDrZrr 3809 +VPBROADCASTDrZrrk 3810 +VPBROADCASTDrZrrkz 3811 +VPBROADCASTDrm 3812 +VPBROADCASTDrr 3813 +VPBROADCASTMB 3814 +VPBROADCASTMW 3815 +VPBROADCASTQYrm 3816 +VPBROADCASTQYrr 3817 +VPBROADCASTQZ 3818 +VPBROADCASTQZrm 3819 +VPBROADCASTQZrmk 3820 +VPBROADCASTQZrmkz 3821 +VPBROADCASTQZrr 3822 +VPBROADCASTQZrrk 3823 +VPBROADCASTQZrrkz 3824 +VPBROADCASTQrZ 3825 +VPBROADCASTQrZrr 3826 +VPBROADCASTQrZrrk 3827 +VPBROADCASTQrZrrkz 3828 +VPBROADCASTQrm 3829 +VPBROADCASTQrr 3830 +VPBROADCASTWYrm 3831 +VPBROADCASTWYrr 3832 +VPBROADCASTWZ 3833 +VPBROADCASTWZrm 3834 +VPBROADCASTWZrmk 3835 +VPBROADCASTWZrmkz 3836 +VPBROADCASTWZrr 3837 +VPBROADCASTWZrrk 3838 +VPBROADCASTWZrrkz 3839 +VPBROADCASTWrZ 3840 +VPBROADCASTWrZrr 3841 +VPBROADCASTWrZrrk 3842 +VPBROADCASTWrZrrkz 3843 +VPBROADCASTWrm 3844 +VPBROADCASTWrr 3845 +VPCLMULQDQYrmi 3846 +VPCLMULQDQYrri 3847 +VPCLMULQDQZ 3848 +VPCLMULQDQZrmi 3849 +VPCLMULQDQZrri 3850 +VPCLMULQDQrmi 3851 +VPCLMULQDQrri 3852 +VPCMOVYrmr 3853 +VPCMOVYrrm 3854 +VPCMOVYrrr 3855 +VPCMOVYrrr_REV 3856 +VPCMOVrmr 3857 +VPCMOVrrm 3858 +VPCMOVrrr 3859 +VPCMOVrrr_REV 3860 +VPCMPBZ 3861 +VPCMPBZrmi 3862 +VPCMPBZrmik 3863 +VPCMPBZrri 3864 +VPCMPBZrrik 3865 +VPCMPDZ 3866 +VPCMPDZrmbi 3867 +VPCMPDZrmbik 3868 +VPCMPDZrmi 3869 +VPCMPDZrmik 3870 +VPCMPDZrri 3871 +VPCMPDZrrik 3872 +VPCMPEQBYrm 3873 +VPCMPEQBYrr 3874 +VPCMPEQBZ 3875 +VPCMPEQBZrm 3876 +VPCMPEQBZrmk 3877 +VPCMPEQBZrr 3878 +VPCMPEQBZrrk 3879 +VPCMPEQBrm 3880 +VPCMPEQBrr 3881 +VPCMPEQDYrm 3882 +VPCMPEQDYrr 3883 +VPCMPEQDZ 3884 +VPCMPEQDZrm 3885 +VPCMPEQDZrmb 3886 +VPCMPEQDZrmbk 3887 +VPCMPEQDZrmk 3888 +VPCMPEQDZrr 3889 +VPCMPEQDZrrk 3890 +VPCMPEQDrm 3891 +VPCMPEQDrr 3892 +VPCMPEQQYrm 3893 +VPCMPEQQYrr 3894 +VPCMPEQQZ 3895 +VPCMPEQQZrm 3896 +VPCMPEQQZrmb 3897 +VPCMPEQQZrmbk 3898 +VPCMPEQQZrmk 3899 +VPCMPEQQZrr 3900 +VPCMPEQQZrrk 3901 +VPCMPEQQrm 3902 +VPCMPEQQrr 3903 +VPCMPEQWYrm 3904 +VPCMPEQWYrr 3905 +VPCMPEQWZ 3906 +VPCMPEQWZrm 3907 +VPCMPEQWZrmk 3908 +VPCMPEQWZrr 3909 +VPCMPEQWZrrk 3910 +VPCMPEQWrm 3911 +VPCMPEQWrr 3912 +VPCMPESTRIrmi 3913 +VPCMPESTRIrri 3914 +VPCMPESTRMrmi 3915 +VPCMPESTRMrri 3916 +VPCMPGTBYrm 3917 +VPCMPGTBYrr 3918 +VPCMPGTBZ 3919 +VPCMPGTBZrm 3920 +VPCMPGTBZrmk 3921 +VPCMPGTBZrr 3922 +VPCMPGTBZrrk 3923 +VPCMPGTBrm 3924 +VPCMPGTBrr 3925 +VPCMPGTDYrm 3926 +VPCMPGTDYrr 3927 +VPCMPGTDZ 3928 +VPCMPGTDZrm 3929 +VPCMPGTDZrmb 3930 +VPCMPGTDZrmbk 3931 +VPCMPGTDZrmk 3932 +VPCMPGTDZrr 3933 +VPCMPGTDZrrk 3934 +VPCMPGTDrm 3935 +VPCMPGTDrr 3936 +VPCMPGTQYrm 3937 +VPCMPGTQYrr 3938 +VPCMPGTQZ 3939 +VPCMPGTQZrm 3940 +VPCMPGTQZrmb 3941 +VPCMPGTQZrmbk 3942 +VPCMPGTQZrmk 3943 +VPCMPGTQZrr 3944 +VPCMPGTQZrrk 3945 +VPCMPGTQrm 3946 +VPCMPGTQrr 3947 +VPCMPGTWYrm 3948 +VPCMPGTWYrr 3949 +VPCMPGTWZ 3950 +VPCMPGTWZrm 3951 +VPCMPGTWZrmk 3952 +VPCMPGTWZrr 3953 +VPCMPGTWZrrk 3954 +VPCMPGTWrm 3955 +VPCMPGTWrr 3956 +VPCMPISTRIrmi 3957 +VPCMPISTRIrri 3958 +VPCMPISTRMrmi 3959 +VPCMPISTRMrri 3960 +VPCMPQZ 3961 +VPCMPQZrmbi 3962 +VPCMPQZrmbik 3963 +VPCMPQZrmi 3964 +VPCMPQZrmik 3965 +VPCMPQZrri 3966 +VPCMPQZrrik 3967 +VPCMPUBZ 3968 +VPCMPUBZrmi 3969 +VPCMPUBZrmik 3970 +VPCMPUBZrri 3971 +VPCMPUBZrrik 3972 +VPCMPUDZ 3973 +VPCMPUDZrmbi 3974 +VPCMPUDZrmbik 3975 +VPCMPUDZrmi 3976 +VPCMPUDZrmik 3977 +VPCMPUDZrri 3978 +VPCMPUDZrrik 3979 +VPCMPUQZ 3980 +VPCMPUQZrmbi 3981 +VPCMPUQZrmbik 3982 +VPCMPUQZrmi 3983 +VPCMPUQZrmik 3984 +VPCMPUQZrri 3985 +VPCMPUQZrrik 3986 +VPCMPUWZ 3987 +VPCMPUWZrmi 3988 +VPCMPUWZrmik 3989 +VPCMPUWZrri 3990 +VPCMPUWZrrik 3991 +VPCMPWZ 3992 +VPCMPWZrmi 3993 +VPCMPWZrmik 3994 +VPCMPWZrri 3995 +VPCMPWZrrik 3996 +VPCOMBmi 3997 +VPCOMBri 3998 +VPCOMDmi 3999 +VPCOMDri 4000 +VPCOMPRESSBZ 4001 +VPCOMPRESSBZmr 4002 +VPCOMPRESSBZmrk 4003 +VPCOMPRESSBZrr 4004 +VPCOMPRESSBZrrk 4005 +VPCOMPRESSBZrrkz 4006 +VPCOMPRESSDZ 4007 +VPCOMPRESSDZmr 4008 +VPCOMPRESSDZmrk 4009 +VPCOMPRESSDZrr 4010 +VPCOMPRESSDZrrk 4011 +VPCOMPRESSDZrrkz 4012 +VPCOMPRESSQZ 4013 +VPCOMPRESSQZmr 4014 +VPCOMPRESSQZmrk 4015 +VPCOMPRESSQZrr 4016 +VPCOMPRESSQZrrk 4017 +VPCOMPRESSQZrrkz 4018 +VPCOMPRESSWZ 4019 +VPCOMPRESSWZmr 4020 +VPCOMPRESSWZmrk 4021 +VPCOMPRESSWZrr 4022 +VPCOMPRESSWZrrk 4023 +VPCOMPRESSWZrrkz 4024 +VPCOMQmi 4025 +VPCOMQri 4026 +VPCOMUBmi 4027 +VPCOMUBri 4028 +VPCOMUDmi 4029 +VPCOMUDri 4030 +VPCOMUQmi 4031 +VPCOMUQri 4032 +VPCOMUWmi 4033 +VPCOMUWri 4034 +VPCOMWmi 4035 +VPCOMWri 4036 +VPCONFLICTDZ 4037 +VPCONFLICTDZrm 4038 +VPCONFLICTDZrmb 4039 +VPCONFLICTDZrmbk 4040 +VPCONFLICTDZrmbkz 4041 +VPCONFLICTDZrmk 4042 +VPCONFLICTDZrmkz 4043 +VPCONFLICTDZrr 4044 +VPCONFLICTDZrrk 4045 +VPCONFLICTDZrrkz 4046 +VPCONFLICTQZ 4047 +VPCONFLICTQZrm 4048 +VPCONFLICTQZrmb 4049 +VPCONFLICTQZrmbk 4050 +VPCONFLICTQZrmbkz 4051 +VPCONFLICTQZrmk 4052 +VPCONFLICTQZrmkz 4053 +VPCONFLICTQZrr 4054 +VPCONFLICTQZrrk 4055 +VPCONFLICTQZrrkz 4056 +VPDPBSSDSYrm 4057 +VPDPBSSDSYrr 4058 +VPDPBSSDSZ 4059 +VPDPBSSDSZrm 4060 +VPDPBSSDSZrmb 4061 +VPDPBSSDSZrmbk 4062 +VPDPBSSDSZrmbkz 4063 +VPDPBSSDSZrmk 4064 +VPDPBSSDSZrmkz 4065 +VPDPBSSDSZrr 4066 +VPDPBSSDSZrrk 4067 +VPDPBSSDSZrrkz 4068 +VPDPBSSDSrm 4069 +VPDPBSSDSrr 4070 +VPDPBSSDYrm 4071 +VPDPBSSDYrr 4072 +VPDPBSSDZ 4073 +VPDPBSSDZrm 4074 +VPDPBSSDZrmb 4075 +VPDPBSSDZrmbk 4076 +VPDPBSSDZrmbkz 4077 +VPDPBSSDZrmk 4078 +VPDPBSSDZrmkz 4079 +VPDPBSSDZrr 4080 +VPDPBSSDZrrk 4081 +VPDPBSSDZrrkz 4082 +VPDPBSSDrm 4083 +VPDPBSSDrr 4084 +VPDPBSUDSYrm 4085 +VPDPBSUDSYrr 4086 +VPDPBSUDSZ 4087 +VPDPBSUDSZrm 4088 +VPDPBSUDSZrmb 4089 +VPDPBSUDSZrmbk 4090 +VPDPBSUDSZrmbkz 4091 +VPDPBSUDSZrmk 4092 +VPDPBSUDSZrmkz 4093 +VPDPBSUDSZrr 4094 +VPDPBSUDSZrrk 4095 +VPDPBSUDSZrrkz 4096 +VPDPBSUDSrm 4097 +VPDPBSUDSrr 4098 +VPDPBSUDYrm 4099 +VPDPBSUDYrr 4100 +VPDPBSUDZ 4101 +VPDPBSUDZrm 4102 +VPDPBSUDZrmb 4103 +VPDPBSUDZrmbk 4104 +VPDPBSUDZrmbkz 4105 +VPDPBSUDZrmk 4106 +VPDPBSUDZrmkz 4107 +VPDPBSUDZrr 4108 +VPDPBSUDZrrk 4109 +VPDPBSUDZrrkz 4110 +VPDPBSUDrm 4111 +VPDPBSUDrr 4112 +VPDPBUSDSYrm 4113 +VPDPBUSDSYrr 4114 +VPDPBUSDSZ 4115 +VPDPBUSDSZrm 4116 +VPDPBUSDSZrmb 4117 +VPDPBUSDSZrmbk 4118 +VPDPBUSDSZrmbkz 4119 +VPDPBUSDSZrmk 4120 +VPDPBUSDSZrmkz 4121 +VPDPBUSDSZrr 4122 +VPDPBUSDSZrrk 4123 +VPDPBUSDSZrrkz 4124 +VPDPBUSDSrm 4125 +VPDPBUSDSrr 4126 +VPDPBUSDYrm 4127 +VPDPBUSDYrr 4128 +VPDPBUSDZ 4129 +VPDPBUSDZrm 4130 +VPDPBUSDZrmb 4131 +VPDPBUSDZrmbk 4132 +VPDPBUSDZrmbkz 4133 +VPDPBUSDZrmk 4134 +VPDPBUSDZrmkz 4135 +VPDPBUSDZrr 4136 +VPDPBUSDZrrk 4137 +VPDPBUSDZrrkz 4138 +VPDPBUSDrm 4139 +VPDPBUSDrr 4140 +VPDPBUUDSYrm 4141 +VPDPBUUDSYrr 4142 +VPDPBUUDSZ 4143 +VPDPBUUDSZrm 4144 +VPDPBUUDSZrmb 4145 +VPDPBUUDSZrmbk 4146 +VPDPBUUDSZrmbkz 4147 +VPDPBUUDSZrmk 4148 +VPDPBUUDSZrmkz 4149 +VPDPBUUDSZrr 4150 +VPDPBUUDSZrrk 4151 +VPDPBUUDSZrrkz 4152 +VPDPBUUDSrm 4153 +VPDPBUUDSrr 4154 +VPDPBUUDYrm 4155 +VPDPBUUDYrr 4156 +VPDPBUUDZ 4157 +VPDPBUUDZrm 4158 +VPDPBUUDZrmb 4159 +VPDPBUUDZrmbk 4160 +VPDPBUUDZrmbkz 4161 +VPDPBUUDZrmk 4162 +VPDPBUUDZrmkz 4163 +VPDPBUUDZrr 4164 +VPDPBUUDZrrk 4165 +VPDPBUUDZrrkz 4166 +VPDPBUUDrm 4167 +VPDPBUUDrr 4168 +VPDPWSSDSYrm 4169 +VPDPWSSDSYrr 4170 +VPDPWSSDSZ 4171 +VPDPWSSDSZrm 4172 +VPDPWSSDSZrmb 4173 +VPDPWSSDSZrmbk 4174 +VPDPWSSDSZrmbkz 4175 +VPDPWSSDSZrmk 4176 +VPDPWSSDSZrmkz 4177 +VPDPWSSDSZrr 4178 +VPDPWSSDSZrrk 4179 +VPDPWSSDSZrrkz 4180 +VPDPWSSDSrm 4181 +VPDPWSSDSrr 4182 +VPDPWSSDYrm 4183 +VPDPWSSDYrr 4184 +VPDPWSSDZ 4185 +VPDPWSSDZrm 4186 +VPDPWSSDZrmb 4187 +VPDPWSSDZrmbk 4188 +VPDPWSSDZrmbkz 4189 +VPDPWSSDZrmk 4190 +VPDPWSSDZrmkz 4191 +VPDPWSSDZrr 4192 +VPDPWSSDZrrk 4193 +VPDPWSSDZrrkz 4194 +VPDPWSSDrm 4195 +VPDPWSSDrr 4196 +VPDPWSUDSYrm 4197 +VPDPWSUDSYrr 4198 +VPDPWSUDSZ 4199 +VPDPWSUDSZrm 4200 +VPDPWSUDSZrmb 4201 +VPDPWSUDSZrmbk 4202 +VPDPWSUDSZrmbkz 4203 +VPDPWSUDSZrmk 4204 +VPDPWSUDSZrmkz 4205 +VPDPWSUDSZrr 4206 +VPDPWSUDSZrrk 4207 +VPDPWSUDSZrrkz 4208 +VPDPWSUDSrm 4209 +VPDPWSUDSrr 4210 +VPDPWSUDYrm 4211 +VPDPWSUDYrr 4212 +VPDPWSUDZ 4213 +VPDPWSUDZrm 4214 +VPDPWSUDZrmb 4215 +VPDPWSUDZrmbk 4216 +VPDPWSUDZrmbkz 4217 +VPDPWSUDZrmk 4218 +VPDPWSUDZrmkz 4219 +VPDPWSUDZrr 4220 +VPDPWSUDZrrk 4221 +VPDPWSUDZrrkz 4222 +VPDPWSUDrm 4223 +VPDPWSUDrr 4224 +VPDPWUSDSYrm 4225 +VPDPWUSDSYrr 4226 +VPDPWUSDSZ 4227 +VPDPWUSDSZrm 4228 +VPDPWUSDSZrmb 4229 +VPDPWUSDSZrmbk 4230 +VPDPWUSDSZrmbkz 4231 +VPDPWUSDSZrmk 4232 +VPDPWUSDSZrmkz 4233 +VPDPWUSDSZrr 4234 +VPDPWUSDSZrrk 4235 +VPDPWUSDSZrrkz 4236 +VPDPWUSDSrm 4237 +VPDPWUSDSrr 4238 +VPDPWUSDYrm 4239 +VPDPWUSDYrr 4240 +VPDPWUSDZ 4241 +VPDPWUSDZrm 4242 +VPDPWUSDZrmb 4243 +VPDPWUSDZrmbk 4244 +VPDPWUSDZrmbkz 4245 +VPDPWUSDZrmk 4246 +VPDPWUSDZrmkz 4247 +VPDPWUSDZrr 4248 +VPDPWUSDZrrk 4249 +VPDPWUSDZrrkz 4250 +VPDPWUSDrm 4251 +VPDPWUSDrr 4252 +VPDPWUUDSYrm 4253 +VPDPWUUDSYrr 4254 +VPDPWUUDSZ 4255 +VPDPWUUDSZrm 4256 +VPDPWUUDSZrmb 4257 +VPDPWUUDSZrmbk 4258 +VPDPWUUDSZrmbkz 4259 +VPDPWUUDSZrmk 4260 +VPDPWUUDSZrmkz 4261 +VPDPWUUDSZrr 4262 +VPDPWUUDSZrrk 4263 +VPDPWUUDSZrrkz 4264 +VPDPWUUDSrm 4265 +VPDPWUUDSrr 4266 +VPDPWUUDYrm 4267 +VPDPWUUDYrr 4268 +VPDPWUUDZ 4269 +VPDPWUUDZrm 4270 +VPDPWUUDZrmb 4271 +VPDPWUUDZrmbk 4272 +VPDPWUUDZrmbkz 4273 +VPDPWUUDZrmk 4274 +VPDPWUUDZrmkz 4275 +VPDPWUUDZrr 4276 +VPDPWUUDZrrk 4277 +VPDPWUUDZrrkz 4278 +VPDPWUUDrm 4279 +VPDPWUUDrr 4280 +VPERM 4281 +VPERMBZ 4282 +VPERMBZrm 4283 +VPERMBZrmk 4284 +VPERMBZrmkz 4285 +VPERMBZrr 4286 +VPERMBZrrk 4287 +VPERMBZrrkz 4288 +VPERMDYrm 4289 +VPERMDYrr 4290 +VPERMDZ 4291 +VPERMDZrm 4292 +VPERMDZrmb 4293 +VPERMDZrmbk 4294 +VPERMDZrmbkz 4295 +VPERMDZrmk 4296 +VPERMDZrmkz 4297 +VPERMDZrr 4298 +VPERMDZrrk 4299 +VPERMDZrrkz 4300 +VPERMI 4301 +VPERMIL 4302 +VPERMILPDYmi 4303 +VPERMILPDYri 4304 +VPERMILPDYrm 4305 +VPERMILPDYrr 4306 +VPERMILPDZ 4307 +VPERMILPDZmbi 4308 +VPERMILPDZmbik 4309 +VPERMILPDZmbikz 4310 +VPERMILPDZmi 4311 +VPERMILPDZmik 4312 +VPERMILPDZmikz 4313 +VPERMILPDZri 4314 +VPERMILPDZrik 4315 +VPERMILPDZrikz 4316 +VPERMILPDZrm 4317 +VPERMILPDZrmb 4318 +VPERMILPDZrmbk 4319 +VPERMILPDZrmbkz 4320 +VPERMILPDZrmk 4321 +VPERMILPDZrmkz 4322 +VPERMILPDZrr 4323 +VPERMILPDZrrk 4324 +VPERMILPDZrrkz 4325 +VPERMILPDmi 4326 +VPERMILPDri 4327 +VPERMILPDrm 4328 +VPERMILPDrr 4329 +VPERMILPSYmi 4330 +VPERMILPSYri 4331 +VPERMILPSYrm 4332 +VPERMILPSYrr 4333 +VPERMILPSZ 4334 +VPERMILPSZmbi 4335 +VPERMILPSZmbik 4336 +VPERMILPSZmbikz 4337 +VPERMILPSZmi 4338 +VPERMILPSZmik 4339 +VPERMILPSZmikz 4340 +VPERMILPSZri 4341 +VPERMILPSZrik 4342 +VPERMILPSZrikz 4343 +VPERMILPSZrm 4344 +VPERMILPSZrmb 4345 +VPERMILPSZrmbk 4346 +VPERMILPSZrmbkz 4347 +VPERMILPSZrmk 4348 +VPERMILPSZrmkz 4349 +VPERMILPSZrr 4350 +VPERMILPSZrrk 4351 +VPERMILPSZrrkz 4352 +VPERMILPSmi 4353 +VPERMILPSri 4354 +VPERMILPSrm 4355 +VPERMILPSrr 4356 +VPERMPDYmi 4357 +VPERMPDYri 4358 +VPERMPDZ 4359 +VPERMPDZmbi 4360 +VPERMPDZmbik 4361 +VPERMPDZmbikz 4362 +VPERMPDZmi 4363 +VPERMPDZmik 4364 +VPERMPDZmikz 4365 +VPERMPDZri 4366 +VPERMPDZrik 4367 +VPERMPDZrikz 4368 +VPERMPDZrm 4369 +VPERMPDZrmb 4370 +VPERMPDZrmbk 4371 +VPERMPDZrmbkz 4372 +VPERMPDZrmk 4373 +VPERMPDZrmkz 4374 +VPERMPDZrr 4375 +VPERMPDZrrk 4376 +VPERMPDZrrkz 4377 +VPERMPSYrm 4378 +VPERMPSYrr 4379 +VPERMPSZ 4380 +VPERMPSZrm 4381 +VPERMPSZrmb 4382 +VPERMPSZrmbk 4383 +VPERMPSZrmbkz 4384 +VPERMPSZrmk 4385 +VPERMPSZrmkz 4386 +VPERMPSZrr 4387 +VPERMPSZrrk 4388 +VPERMPSZrrkz 4389 +VPERMQYmi 4390 +VPERMQYri 4391 +VPERMQZ 4392 +VPERMQZmbi 4393 +VPERMQZmbik 4394 +VPERMQZmbikz 4395 +VPERMQZmi 4396 +VPERMQZmik 4397 +VPERMQZmikz 4398 +VPERMQZri 4399 +VPERMQZrik 4400 +VPERMQZrikz 4401 +VPERMQZrm 4402 +VPERMQZrmb 4403 +VPERMQZrmbk 4404 +VPERMQZrmbkz 4405 +VPERMQZrmk 4406 +VPERMQZrmkz 4407 +VPERMQZrr 4408 +VPERMQZrrk 4409 +VPERMQZrrkz 4410 +VPERMT 4411 +VPERMWZ 4412 +VPERMWZrm 4413 +VPERMWZrmk 4414 +VPERMWZrmkz 4415 +VPERMWZrr 4416 +VPERMWZrrk 4417 +VPERMWZrrkz 4418 +VPEXPANDBZ 4419 +VPEXPANDBZrm 4420 +VPEXPANDBZrmk 4421 +VPEXPANDBZrmkz 4422 +VPEXPANDBZrr 4423 +VPEXPANDBZrrk 4424 +VPEXPANDBZrrkz 4425 +VPEXPANDDZ 4426 +VPEXPANDDZrm 4427 +VPEXPANDDZrmk 4428 +VPEXPANDDZrmkz 4429 +VPEXPANDDZrr 4430 +VPEXPANDDZrrk 4431 +VPEXPANDDZrrkz 4432 +VPEXPANDQZ 4433 +VPEXPANDQZrm 4434 +VPEXPANDQZrmk 4435 +VPEXPANDQZrmkz 4436 +VPEXPANDQZrr 4437 +VPEXPANDQZrrk 4438 +VPEXPANDQZrrkz 4439 +VPEXPANDWZ 4440 +VPEXPANDWZrm 4441 +VPEXPANDWZrmk 4442 +VPEXPANDWZrmkz 4443 +VPEXPANDWZrr 4444 +VPEXPANDWZrrk 4445 +VPEXPANDWZrrkz 4446 +VPEXTRBZmri 4447 +VPEXTRBZrri 4448 +VPEXTRBmri 4449 +VPEXTRBrri 4450 +VPEXTRDZmri 4451 +VPEXTRDZrri 4452 +VPEXTRDmri 4453 +VPEXTRDrri 4454 +VPEXTRQZmri 4455 +VPEXTRQZrri 4456 +VPEXTRQmri 4457 +VPEXTRQrri 4458 +VPEXTRWZmri 4459 +VPEXTRWZrri 4460 +VPEXTRWZrri_REV 4461 +VPEXTRWmri 4462 +VPEXTRWrri 4463 +VPEXTRWrri_REV 4464 +VPGATHERDDYrm 4465 +VPGATHERDDZ 4466 +VPGATHERDDZrm 4467 +VPGATHERDDrm 4468 +VPGATHERDQYrm 4469 +VPGATHERDQZ 4470 +VPGATHERDQZrm 4471 +VPGATHERDQrm 4472 +VPGATHERQDYrm 4473 +VPGATHERQDZ 4474 +VPGATHERQDZrm 4475 +VPGATHERQDrm 4476 +VPGATHERQQYrm 4477 +VPGATHERQQZ 4478 +VPGATHERQQZrm 4479 +VPGATHERQQrm 4480 +VPHADDBDrm 4481 +VPHADDBDrr 4482 +VPHADDBQrm 4483 +VPHADDBQrr 4484 +VPHADDBWrm 4485 +VPHADDBWrr 4486 +VPHADDDQrm 4487 +VPHADDDQrr 4488 +VPHADDDYrm 4489 +VPHADDDYrr 4490 +VPHADDDrm 4491 +VPHADDDrr 4492 +VPHADDSWYrm 4493 +VPHADDSWYrr 4494 +VPHADDSWrm 4495 +VPHADDSWrr 4496 +VPHADDUBDrm 4497 +VPHADDUBDrr 4498 +VPHADDUBQrm 4499 +VPHADDUBQrr 4500 +VPHADDUBWrm 4501 +VPHADDUBWrr 4502 +VPHADDUDQrm 4503 +VPHADDUDQrr 4504 +VPHADDUWDrm 4505 +VPHADDUWDrr 4506 +VPHADDUWQrm 4507 +VPHADDUWQrr 4508 +VPHADDWDrm 4509 +VPHADDWDrr 4510 +VPHADDWQrm 4511 +VPHADDWQrr 4512 +VPHADDWYrm 4513 +VPHADDWYrr 4514 +VPHADDWrm 4515 +VPHADDWrr 4516 +VPHMINPOSUWrm 4517 +VPHMINPOSUWrr 4518 +VPHSUBBWrm 4519 +VPHSUBBWrr 4520 +VPHSUBDQrm 4521 +VPHSUBDQrr 4522 +VPHSUBDYrm 4523 +VPHSUBDYrr 4524 +VPHSUBDrm 4525 +VPHSUBDrr 4526 +VPHSUBSWYrm 4527 +VPHSUBSWYrr 4528 +VPHSUBSWrm 4529 +VPHSUBSWrr 4530 +VPHSUBWDrm 4531 +VPHSUBWDrr 4532 +VPHSUBWYrm 4533 +VPHSUBWYrr 4534 +VPHSUBWrm 4535 +VPHSUBWrr 4536 +VPINSRBZrmi 4537 +VPINSRBZrri 4538 +VPINSRBrmi 4539 +VPINSRBrri 4540 +VPINSRDZrmi 4541 +VPINSRDZrri 4542 +VPINSRDrmi 4543 +VPINSRDrri 4544 +VPINSRQZrmi 4545 +VPINSRQZrri 4546 +VPINSRQrmi 4547 +VPINSRQrri 4548 +VPINSRWZrmi 4549 +VPINSRWZrri 4550 +VPINSRWrmi 4551 +VPINSRWrri 4552 +VPLZCNTDZ 4553 +VPLZCNTDZrm 4554 +VPLZCNTDZrmb 4555 +VPLZCNTDZrmbk 4556 +VPLZCNTDZrmbkz 4557 +VPLZCNTDZrmk 4558 +VPLZCNTDZrmkz 4559 +VPLZCNTDZrr 4560 +VPLZCNTDZrrk 4561 +VPLZCNTDZrrkz 4562 +VPLZCNTQZ 4563 +VPLZCNTQZrm 4564 +VPLZCNTQZrmb 4565 +VPLZCNTQZrmbk 4566 +VPLZCNTQZrmbkz 4567 +VPLZCNTQZrmk 4568 +VPLZCNTQZrmkz 4569 +VPLZCNTQZrr 4570 +VPLZCNTQZrrk 4571 +VPLZCNTQZrrkz 4572 +VPMACSDDrm 4573 +VPMACSDDrr 4574 +VPMACSDQHrm 4575 +VPMACSDQHrr 4576 +VPMACSDQLrm 4577 +VPMACSDQLrr 4578 +VPMACSSDDrm 4579 +VPMACSSDDrr 4580 +VPMACSSDQHrm 4581 +VPMACSSDQHrr 4582 +VPMACSSDQLrm 4583 +VPMACSSDQLrr 4584 +VPMACSSWDrm 4585 +VPMACSSWDrr 4586 +VPMACSSWWrm 4587 +VPMACSSWWrr 4588 +VPMACSWDrm 4589 +VPMACSWDrr 4590 +VPMACSWWrm 4591 +VPMACSWWrr 4592 +VPMADCSSWDrm 4593 +VPMADCSSWDrr 4594 +VPMADCSWDrm 4595 +VPMADCSWDrr 4596 +VPMADD 4597 +VPMADDUBSWYrm 4598 +VPMADDUBSWYrr 4599 +VPMADDUBSWZ 4600 +VPMADDUBSWZrm 4601 +VPMADDUBSWZrmk 4602 +VPMADDUBSWZrmkz 4603 +VPMADDUBSWZrr 4604 +VPMADDUBSWZrrk 4605 +VPMADDUBSWZrrkz 4606 +VPMADDUBSWrm 4607 +VPMADDUBSWrr 4608 +VPMADDWDYrm 4609 +VPMADDWDYrr 4610 +VPMADDWDZ 4611 +VPMADDWDZrm 4612 +VPMADDWDZrmk 4613 +VPMADDWDZrmkz 4614 +VPMADDWDZrr 4615 +VPMADDWDZrrk 4616 +VPMADDWDZrrkz 4617 +VPMADDWDrm 4618 +VPMADDWDrr 4619 +VPMASKMOVDYmr 4620 +VPMASKMOVDYrm 4621 +VPMASKMOVDmr 4622 +VPMASKMOVDrm 4623 +VPMASKMOVQYmr 4624 +VPMASKMOVQYrm 4625 +VPMASKMOVQmr 4626 +VPMASKMOVQrm 4627 +VPMAXSBYrm 4628 +VPMAXSBYrr 4629 +VPMAXSBZ 4630 +VPMAXSBZrm 4631 +VPMAXSBZrmk 4632 +VPMAXSBZrmkz 4633 +VPMAXSBZrr 4634 +VPMAXSBZrrk 4635 +VPMAXSBZrrkz 4636 +VPMAXSBrm 4637 +VPMAXSBrr 4638 +VPMAXSDYrm 4639 +VPMAXSDYrr 4640 +VPMAXSDZ 4641 +VPMAXSDZrm 4642 +VPMAXSDZrmb 4643 +VPMAXSDZrmbk 4644 +VPMAXSDZrmbkz 4645 +VPMAXSDZrmk 4646 +VPMAXSDZrmkz 4647 +VPMAXSDZrr 4648 +VPMAXSDZrrk 4649 +VPMAXSDZrrkz 4650 +VPMAXSDrm 4651 +VPMAXSDrr 4652 +VPMAXSQZ 4653 +VPMAXSQZrm 4654 +VPMAXSQZrmb 4655 +VPMAXSQZrmbk 4656 +VPMAXSQZrmbkz 4657 +VPMAXSQZrmk 4658 +VPMAXSQZrmkz 4659 +VPMAXSQZrr 4660 +VPMAXSQZrrk 4661 +VPMAXSQZrrkz 4662 +VPMAXSWYrm 4663 +VPMAXSWYrr 4664 +VPMAXSWZ 4665 +VPMAXSWZrm 4666 +VPMAXSWZrmk 4667 +VPMAXSWZrmkz 4668 +VPMAXSWZrr 4669 +VPMAXSWZrrk 4670 +VPMAXSWZrrkz 4671 +VPMAXSWrm 4672 +VPMAXSWrr 4673 +VPMAXUBYrm 4674 +VPMAXUBYrr 4675 +VPMAXUBZ 4676 +VPMAXUBZrm 4677 +VPMAXUBZrmk 4678 +VPMAXUBZrmkz 4679 +VPMAXUBZrr 4680 +VPMAXUBZrrk 4681 +VPMAXUBZrrkz 4682 +VPMAXUBrm 4683 +VPMAXUBrr 4684 +VPMAXUDYrm 4685 +VPMAXUDYrr 4686 +VPMAXUDZ 4687 +VPMAXUDZrm 4688 +VPMAXUDZrmb 4689 +VPMAXUDZrmbk 4690 +VPMAXUDZrmbkz 4691 +VPMAXUDZrmk 4692 +VPMAXUDZrmkz 4693 +VPMAXUDZrr 4694 +VPMAXUDZrrk 4695 +VPMAXUDZrrkz 4696 +VPMAXUDrm 4697 +VPMAXUDrr 4698 +VPMAXUQZ 4699 +VPMAXUQZrm 4700 +VPMAXUQZrmb 4701 +VPMAXUQZrmbk 4702 +VPMAXUQZrmbkz 4703 +VPMAXUQZrmk 4704 +VPMAXUQZrmkz 4705 +VPMAXUQZrr 4706 +VPMAXUQZrrk 4707 +VPMAXUQZrrkz 4708 +VPMAXUWYrm 4709 +VPMAXUWYrr 4710 +VPMAXUWZ 4711 +VPMAXUWZrm 4712 +VPMAXUWZrmk 4713 +VPMAXUWZrmkz 4714 +VPMAXUWZrr 4715 +VPMAXUWZrrk 4716 +VPMAXUWZrrkz 4717 +VPMAXUWrm 4718 +VPMAXUWrr 4719 +VPMINSBYrm 4720 +VPMINSBYrr 4721 +VPMINSBZ 4722 +VPMINSBZrm 4723 +VPMINSBZrmk 4724 +VPMINSBZrmkz 4725 +VPMINSBZrr 4726 +VPMINSBZrrk 4727 +VPMINSBZrrkz 4728 +VPMINSBrm 4729 +VPMINSBrr 4730 +VPMINSDYrm 4731 +VPMINSDYrr 4732 +VPMINSDZ 4733 +VPMINSDZrm 4734 +VPMINSDZrmb 4735 +VPMINSDZrmbk 4736 +VPMINSDZrmbkz 4737 +VPMINSDZrmk 4738 +VPMINSDZrmkz 4739 +VPMINSDZrr 4740 +VPMINSDZrrk 4741 +VPMINSDZrrkz 4742 +VPMINSDrm 4743 +VPMINSDrr 4744 +VPMINSQZ 4745 +VPMINSQZrm 4746 +VPMINSQZrmb 4747 +VPMINSQZrmbk 4748 +VPMINSQZrmbkz 4749 +VPMINSQZrmk 4750 +VPMINSQZrmkz 4751 +VPMINSQZrr 4752 +VPMINSQZrrk 4753 +VPMINSQZrrkz 4754 +VPMINSWYrm 4755 +VPMINSWYrr 4756 +VPMINSWZ 4757 +VPMINSWZrm 4758 +VPMINSWZrmk 4759 +VPMINSWZrmkz 4760 +VPMINSWZrr 4761 +VPMINSWZrrk 4762 +VPMINSWZrrkz 4763 +VPMINSWrm 4764 +VPMINSWrr 4765 +VPMINUBYrm 4766 +VPMINUBYrr 4767 +VPMINUBZ 4768 +VPMINUBZrm 4769 +VPMINUBZrmk 4770 +VPMINUBZrmkz 4771 +VPMINUBZrr 4772 +VPMINUBZrrk 4773 +VPMINUBZrrkz 4774 +VPMINUBrm 4775 +VPMINUBrr 4776 +VPMINUDYrm 4777 +VPMINUDYrr 4778 +VPMINUDZ 4779 +VPMINUDZrm 4780 +VPMINUDZrmb 4781 +VPMINUDZrmbk 4782 +VPMINUDZrmbkz 4783 +VPMINUDZrmk 4784 +VPMINUDZrmkz 4785 +VPMINUDZrr 4786 +VPMINUDZrrk 4787 +VPMINUDZrrkz 4788 +VPMINUDrm 4789 +VPMINUDrr 4790 +VPMINUQZ 4791 +VPMINUQZrm 4792 +VPMINUQZrmb 4793 +VPMINUQZrmbk 4794 +VPMINUQZrmbkz 4795 +VPMINUQZrmk 4796 +VPMINUQZrmkz 4797 +VPMINUQZrr 4798 +VPMINUQZrrk 4799 +VPMINUQZrrkz 4800 +VPMINUWYrm 4801 +VPMINUWYrr 4802 +VPMINUWZ 4803 +VPMINUWZrm 4804 +VPMINUWZrmk 4805 +VPMINUWZrmkz 4806 +VPMINUWZrr 4807 +VPMINUWZrrk 4808 +VPMINUWZrrkz 4809 +VPMINUWrm 4810 +VPMINUWrr 4811 +VPMOVB 4812 +VPMOVD 4813 +VPMOVDBZ 4814 +VPMOVDBZmr 4815 +VPMOVDBZmrk 4816 +VPMOVDBZrr 4817 +VPMOVDBZrrk 4818 +VPMOVDBZrrkz 4819 +VPMOVDWZ 4820 +VPMOVDWZmr 4821 +VPMOVDWZmrk 4822 +VPMOVDWZrr 4823 +VPMOVDWZrrk 4824 +VPMOVDWZrrkz 4825 +VPMOVM 4826 +VPMOVMSKBYrr 4827 +VPMOVMSKBrr 4828 +VPMOVQ 4829 +VPMOVQBZ 4830 +VPMOVQBZmr 4831 +VPMOVQBZmrk 4832 +VPMOVQBZrr 4833 +VPMOVQBZrrk 4834 +VPMOVQBZrrkz 4835 +VPMOVQDZ 4836 +VPMOVQDZmr 4837 +VPMOVQDZmrk 4838 +VPMOVQDZrr 4839 +VPMOVQDZrrk 4840 +VPMOVQDZrrkz 4841 +VPMOVQWZ 4842 +VPMOVQWZmr 4843 +VPMOVQWZmrk 4844 +VPMOVQWZrr 4845 +VPMOVQWZrrk 4846 +VPMOVQWZrrkz 4847 +VPMOVSDBZ 4848 +VPMOVSDBZmr 4849 +VPMOVSDBZmrk 4850 +VPMOVSDBZrr 4851 +VPMOVSDBZrrk 4852 +VPMOVSDBZrrkz 4853 +VPMOVSDWZ 4854 +VPMOVSDWZmr 4855 +VPMOVSDWZmrk 4856 +VPMOVSDWZrr 4857 +VPMOVSDWZrrk 4858 +VPMOVSDWZrrkz 4859 +VPMOVSQBZ 4860 +VPMOVSQBZmr 4861 +VPMOVSQBZmrk 4862 +VPMOVSQBZrr 4863 +VPMOVSQBZrrk 4864 +VPMOVSQBZrrkz 4865 +VPMOVSQDZ 4866 +VPMOVSQDZmr 4867 +VPMOVSQDZmrk 4868 +VPMOVSQDZrr 4869 +VPMOVSQDZrrk 4870 +VPMOVSQDZrrkz 4871 +VPMOVSQWZ 4872 +VPMOVSQWZmr 4873 +VPMOVSQWZmrk 4874 +VPMOVSQWZrr 4875 +VPMOVSQWZrrk 4876 +VPMOVSQWZrrkz 4877 +VPMOVSWBZ 4878 +VPMOVSWBZmr 4879 +VPMOVSWBZmrk 4880 +VPMOVSWBZrr 4881 +VPMOVSWBZrrk 4882 +VPMOVSWBZrrkz 4883 +VPMOVSXBDYrm 4884 +VPMOVSXBDYrr 4885 +VPMOVSXBDZ 4886 +VPMOVSXBDZrm 4887 +VPMOVSXBDZrmk 4888 +VPMOVSXBDZrmkz 4889 +VPMOVSXBDZrr 4890 +VPMOVSXBDZrrk 4891 +VPMOVSXBDZrrkz 4892 +VPMOVSXBDrm 4893 +VPMOVSXBDrr 4894 +VPMOVSXBQYrm 4895 +VPMOVSXBQYrr 4896 +VPMOVSXBQZ 4897 +VPMOVSXBQZrm 4898 +VPMOVSXBQZrmk 4899 +VPMOVSXBQZrmkz 4900 +VPMOVSXBQZrr 4901 +VPMOVSXBQZrrk 4902 +VPMOVSXBQZrrkz 4903 +VPMOVSXBQrm 4904 +VPMOVSXBQrr 4905 +VPMOVSXBWYrm 4906 +VPMOVSXBWYrr 4907 +VPMOVSXBWZ 4908 +VPMOVSXBWZrm 4909 +VPMOVSXBWZrmk 4910 +VPMOVSXBWZrmkz 4911 +VPMOVSXBWZrr 4912 +VPMOVSXBWZrrk 4913 +VPMOVSXBWZrrkz 4914 +VPMOVSXBWrm 4915 +VPMOVSXBWrr 4916 +VPMOVSXDQYrm 4917 +VPMOVSXDQYrr 4918 +VPMOVSXDQZ 4919 +VPMOVSXDQZrm 4920 +VPMOVSXDQZrmk 4921 +VPMOVSXDQZrmkz 4922 +VPMOVSXDQZrr 4923 +VPMOVSXDQZrrk 4924 +VPMOVSXDQZrrkz 4925 +VPMOVSXDQrm 4926 +VPMOVSXDQrr 4927 +VPMOVSXWDYrm 4928 +VPMOVSXWDYrr 4929 +VPMOVSXWDZ 4930 +VPMOVSXWDZrm 4931 +VPMOVSXWDZrmk 4932 +VPMOVSXWDZrmkz 4933 +VPMOVSXWDZrr 4934 +VPMOVSXWDZrrk 4935 +VPMOVSXWDZrrkz 4936 +VPMOVSXWDrm 4937 +VPMOVSXWDrr 4938 +VPMOVSXWQYrm 4939 +VPMOVSXWQYrr 4940 +VPMOVSXWQZ 4941 +VPMOVSXWQZrm 4942 +VPMOVSXWQZrmk 4943 +VPMOVSXWQZrmkz 4944 +VPMOVSXWQZrr 4945 +VPMOVSXWQZrrk 4946 +VPMOVSXWQZrrkz 4947 +VPMOVSXWQrm 4948 +VPMOVSXWQrr 4949 +VPMOVUSDBZ 4950 +VPMOVUSDBZmr 4951 +VPMOVUSDBZmrk 4952 +VPMOVUSDBZrr 4953 +VPMOVUSDBZrrk 4954 +VPMOVUSDBZrrkz 4955 +VPMOVUSDWZ 4956 +VPMOVUSDWZmr 4957 +VPMOVUSDWZmrk 4958 +VPMOVUSDWZrr 4959 +VPMOVUSDWZrrk 4960 +VPMOVUSDWZrrkz 4961 +VPMOVUSQBZ 4962 +VPMOVUSQBZmr 4963 +VPMOVUSQBZmrk 4964 +VPMOVUSQBZrr 4965 +VPMOVUSQBZrrk 4966 +VPMOVUSQBZrrkz 4967 +VPMOVUSQDZ 4968 +VPMOVUSQDZmr 4969 +VPMOVUSQDZmrk 4970 +VPMOVUSQDZrr 4971 +VPMOVUSQDZrrk 4972 +VPMOVUSQDZrrkz 4973 +VPMOVUSQWZ 4974 +VPMOVUSQWZmr 4975 +VPMOVUSQWZmrk 4976 +VPMOVUSQWZrr 4977 +VPMOVUSQWZrrk 4978 +VPMOVUSQWZrrkz 4979 +VPMOVUSWBZ 4980 +VPMOVUSWBZmr 4981 +VPMOVUSWBZmrk 4982 +VPMOVUSWBZrr 4983 +VPMOVUSWBZrrk 4984 +VPMOVUSWBZrrkz 4985 +VPMOVW 4986 +VPMOVWBZ 4987 +VPMOVWBZmr 4988 +VPMOVWBZmrk 4989 +VPMOVWBZrr 4990 +VPMOVWBZrrk 4991 +VPMOVWBZrrkz 4992 +VPMOVZXBDYrm 4993 +VPMOVZXBDYrr 4994 +VPMOVZXBDZ 4995 +VPMOVZXBDZrm 4996 +VPMOVZXBDZrmk 4997 +VPMOVZXBDZrmkz 4998 +VPMOVZXBDZrr 4999 +VPMOVZXBDZrrk 5000 +VPMOVZXBDZrrkz 5001 +VPMOVZXBDrm 5002 +VPMOVZXBDrr 5003 +VPMOVZXBQYrm 5004 +VPMOVZXBQYrr 5005 +VPMOVZXBQZ 5006 +VPMOVZXBQZrm 5007 +VPMOVZXBQZrmk 5008 +VPMOVZXBQZrmkz 5009 +VPMOVZXBQZrr 5010 +VPMOVZXBQZrrk 5011 +VPMOVZXBQZrrkz 5012 +VPMOVZXBQrm 5013 +VPMOVZXBQrr 5014 +VPMOVZXBWYrm 5015 +VPMOVZXBWYrr 5016 +VPMOVZXBWZ 5017 +VPMOVZXBWZrm 5018 +VPMOVZXBWZrmk 5019 +VPMOVZXBWZrmkz 5020 +VPMOVZXBWZrr 5021 +VPMOVZXBWZrrk 5022 +VPMOVZXBWZrrkz 5023 +VPMOVZXBWrm 5024 +VPMOVZXBWrr 5025 +VPMOVZXDQYrm 5026 +VPMOVZXDQYrr 5027 +VPMOVZXDQZ 5028 +VPMOVZXDQZrm 5029 +VPMOVZXDQZrmk 5030 +VPMOVZXDQZrmkz 5031 +VPMOVZXDQZrr 5032 +VPMOVZXDQZrrk 5033 +VPMOVZXDQZrrkz 5034 +VPMOVZXDQrm 5035 +VPMOVZXDQrr 5036 +VPMOVZXWDYrm 5037 +VPMOVZXWDYrr 5038 +VPMOVZXWDZ 5039 +VPMOVZXWDZrm 5040 +VPMOVZXWDZrmk 5041 +VPMOVZXWDZrmkz 5042 +VPMOVZXWDZrr 5043 +VPMOVZXWDZrrk 5044 +VPMOVZXWDZrrkz 5045 +VPMOVZXWDrm 5046 +VPMOVZXWDrr 5047 +VPMOVZXWQYrm 5048 +VPMOVZXWQYrr 5049 +VPMOVZXWQZ 5050 +VPMOVZXWQZrm 5051 +VPMOVZXWQZrmk 5052 +VPMOVZXWQZrmkz 5053 +VPMOVZXWQZrr 5054 +VPMOVZXWQZrrk 5055 +VPMOVZXWQZrrkz 5056 +VPMOVZXWQrm 5057 +VPMOVZXWQrr 5058 +VPMULDQYrm 5059 +VPMULDQYrr 5060 +VPMULDQZ 5061 +VPMULDQZrm 5062 +VPMULDQZrmb 5063 +VPMULDQZrmbk 5064 +VPMULDQZrmbkz 5065 +VPMULDQZrmk 5066 +VPMULDQZrmkz 5067 +VPMULDQZrr 5068 +VPMULDQZrrk 5069 +VPMULDQZrrkz 5070 +VPMULDQrm 5071 +VPMULDQrr 5072 +VPMULHRSWYrm 5073 +VPMULHRSWYrr 5074 +VPMULHRSWZ 5075 +VPMULHRSWZrm 5076 +VPMULHRSWZrmk 5077 +VPMULHRSWZrmkz 5078 +VPMULHRSWZrr 5079 +VPMULHRSWZrrk 5080 +VPMULHRSWZrrkz 5081 +VPMULHRSWrm 5082 +VPMULHRSWrr 5083 +VPMULHUWYrm 5084 +VPMULHUWYrr 5085 +VPMULHUWZ 5086 +VPMULHUWZrm 5087 +VPMULHUWZrmk 5088 +VPMULHUWZrmkz 5089 +VPMULHUWZrr 5090 +VPMULHUWZrrk 5091 +VPMULHUWZrrkz 5092 +VPMULHUWrm 5093 +VPMULHUWrr 5094 +VPMULHWYrm 5095 +VPMULHWYrr 5096 +VPMULHWZ 5097 +VPMULHWZrm 5098 +VPMULHWZrmk 5099 +VPMULHWZrmkz 5100 +VPMULHWZrr 5101 +VPMULHWZrrk 5102 +VPMULHWZrrkz 5103 +VPMULHWrm 5104 +VPMULHWrr 5105 +VPMULLDYrm 5106 +VPMULLDYrr 5107 +VPMULLDZ 5108 +VPMULLDZrm 5109 +VPMULLDZrmb 5110 +VPMULLDZrmbk 5111 +VPMULLDZrmbkz 5112 +VPMULLDZrmk 5113 +VPMULLDZrmkz 5114 +VPMULLDZrr 5115 +VPMULLDZrrk 5116 +VPMULLDZrrkz 5117 +VPMULLDrm 5118 +VPMULLDrr 5119 +VPMULLQZ 5120 +VPMULLQZrm 5121 +VPMULLQZrmb 5122 +VPMULLQZrmbk 5123 +VPMULLQZrmbkz 5124 +VPMULLQZrmk 5125 +VPMULLQZrmkz 5126 +VPMULLQZrr 5127 +VPMULLQZrrk 5128 +VPMULLQZrrkz 5129 +VPMULLWYrm 5130 +VPMULLWYrr 5131 +VPMULLWZ 5132 +VPMULLWZrm 5133 +VPMULLWZrmk 5134 +VPMULLWZrmkz 5135 +VPMULLWZrr 5136 +VPMULLWZrrk 5137 +VPMULLWZrrkz 5138 +VPMULLWrm 5139 +VPMULLWrr 5140 +VPMULTISHIFTQBZ 5141 +VPMULTISHIFTQBZrm 5142 +VPMULTISHIFTQBZrmb 5143 +VPMULTISHIFTQBZrmbk 5144 +VPMULTISHIFTQBZrmbkz 5145 +VPMULTISHIFTQBZrmk 5146 +VPMULTISHIFTQBZrmkz 5147 +VPMULTISHIFTQBZrr 5148 +VPMULTISHIFTQBZrrk 5149 +VPMULTISHIFTQBZrrkz 5150 +VPMULUDQYrm 5151 +VPMULUDQYrr 5152 +VPMULUDQZ 5153 +VPMULUDQZrm 5154 +VPMULUDQZrmb 5155 +VPMULUDQZrmbk 5156 +VPMULUDQZrmbkz 5157 +VPMULUDQZrmk 5158 +VPMULUDQZrmkz 5159 +VPMULUDQZrr 5160 +VPMULUDQZrrk 5161 +VPMULUDQZrrkz 5162 +VPMULUDQrm 5163 +VPMULUDQrr 5164 +VPOPCNTBZ 5165 +VPOPCNTBZrm 5166 +VPOPCNTBZrmk 5167 +VPOPCNTBZrmkz 5168 +VPOPCNTBZrr 5169 +VPOPCNTBZrrk 5170 +VPOPCNTBZrrkz 5171 +VPOPCNTDZ 5172 +VPOPCNTDZrm 5173 +VPOPCNTDZrmb 5174 +VPOPCNTDZrmbk 5175 +VPOPCNTDZrmbkz 5176 +VPOPCNTDZrmk 5177 +VPOPCNTDZrmkz 5178 +VPOPCNTDZrr 5179 +VPOPCNTDZrrk 5180 +VPOPCNTDZrrkz 5181 +VPOPCNTQZ 5182 +VPOPCNTQZrm 5183 +VPOPCNTQZrmb 5184 +VPOPCNTQZrmbk 5185 +VPOPCNTQZrmbkz 5186 +VPOPCNTQZrmk 5187 +VPOPCNTQZrmkz 5188 +VPOPCNTQZrr 5189 +VPOPCNTQZrrk 5190 +VPOPCNTQZrrkz 5191 +VPOPCNTWZ 5192 +VPOPCNTWZrm 5193 +VPOPCNTWZrmk 5194 +VPOPCNTWZrmkz 5195 +VPOPCNTWZrr 5196 +VPOPCNTWZrrk 5197 +VPOPCNTWZrrkz 5198 +VPORDZ 5199 +VPORDZrm 5200 +VPORDZrmb 5201 +VPORDZrmbk 5202 +VPORDZrmbkz 5203 +VPORDZrmk 5204 +VPORDZrmkz 5205 +VPORDZrr 5206 +VPORDZrrk 5207 +VPORDZrrkz 5208 +VPORQZ 5209 +VPORQZrm 5210 +VPORQZrmb 5211 +VPORQZrmbk 5212 +VPORQZrmbkz 5213 +VPORQZrmk 5214 +VPORQZrmkz 5215 +VPORQZrr 5216 +VPORQZrrk 5217 +VPORQZrrkz 5218 +VPORYrm 5219 +VPORYrr 5220 +VPORrm 5221 +VPORrr 5222 +VPPERMrmr 5223 +VPPERMrrm 5224 +VPPERMrrr 5225 +VPPERMrrr_REV 5226 +VPROLDZ 5227 +VPROLDZmbi 5228 +VPROLDZmbik 5229 +VPROLDZmbikz 5230 +VPROLDZmi 5231 +VPROLDZmik 5232 +VPROLDZmikz 5233 +VPROLDZri 5234 +VPROLDZrik 5235 +VPROLDZrikz 5236 +VPROLQZ 5237 +VPROLQZmbi 5238 +VPROLQZmbik 5239 +VPROLQZmbikz 5240 +VPROLQZmi 5241 +VPROLQZmik 5242 +VPROLQZmikz 5243 +VPROLQZri 5244 +VPROLQZrik 5245 +VPROLQZrikz 5246 +VPROLVDZ 5247 +VPROLVDZrm 5248 +VPROLVDZrmb 5249 +VPROLVDZrmbk 5250 +VPROLVDZrmbkz 5251 +VPROLVDZrmk 5252 +VPROLVDZrmkz 5253 +VPROLVDZrr 5254 +VPROLVDZrrk 5255 +VPROLVDZrrkz 5256 +VPROLVQZ 5257 +VPROLVQZrm 5258 +VPROLVQZrmb 5259 +VPROLVQZrmbk 5260 +VPROLVQZrmbkz 5261 +VPROLVQZrmk 5262 +VPROLVQZrmkz 5263 +VPROLVQZrr 5264 +VPROLVQZrrk 5265 +VPROLVQZrrkz 5266 +VPRORDZ 5267 +VPRORDZmbi 5268 +VPRORDZmbik 5269 +VPRORDZmbikz 5270 +VPRORDZmi 5271 +VPRORDZmik 5272 +VPRORDZmikz 5273 +VPRORDZri 5274 +VPRORDZrik 5275 +VPRORDZrikz 5276 +VPRORQZ 5277 +VPRORQZmbi 5278 +VPRORQZmbik 5279 +VPRORQZmbikz 5280 +VPRORQZmi 5281 +VPRORQZmik 5282 +VPRORQZmikz 5283 +VPRORQZri 5284 +VPRORQZrik 5285 +VPRORQZrikz 5286 +VPRORVDZ 5287 +VPRORVDZrm 5288 +VPRORVDZrmb 5289 +VPRORVDZrmbk 5290 +VPRORVDZrmbkz 5291 +VPRORVDZrmk 5292 +VPRORVDZrmkz 5293 +VPRORVDZrr 5294 +VPRORVDZrrk 5295 +VPRORVDZrrkz 5296 +VPRORVQZ 5297 +VPRORVQZrm 5298 +VPRORVQZrmb 5299 +VPRORVQZrmbk 5300 +VPRORVQZrmbkz 5301 +VPRORVQZrmk 5302 +VPRORVQZrmkz 5303 +VPRORVQZrr 5304 +VPRORVQZrrk 5305 +VPRORVQZrrkz 5306 +VPROTBmi 5307 +VPROTBmr 5308 +VPROTBri 5309 +VPROTBrm 5310 +VPROTBrr 5311 +VPROTBrr_REV 5312 +VPROTDmi 5313 +VPROTDmr 5314 +VPROTDri 5315 +VPROTDrm 5316 +VPROTDrr 5317 +VPROTDrr_REV 5318 +VPROTQmi 5319 +VPROTQmr 5320 +VPROTQri 5321 +VPROTQrm 5322 +VPROTQrr 5323 +VPROTQrr_REV 5324 +VPROTWmi 5325 +VPROTWmr 5326 +VPROTWri 5327 +VPROTWrm 5328 +VPROTWrr 5329 +VPROTWrr_REV 5330 +VPSADBWYrm 5331 +VPSADBWYrr 5332 +VPSADBWZ 5333 +VPSADBWZrm 5334 +VPSADBWZrr 5335 +VPSADBWrm 5336 +VPSADBWrr 5337 +VPSCATTERDDZ 5338 +VPSCATTERDDZmr 5339 +VPSCATTERDQZ 5340 +VPSCATTERDQZmr 5341 +VPSCATTERQDZ 5342 +VPSCATTERQDZmr 5343 +VPSCATTERQQZ 5344 +VPSCATTERQQZmr 5345 +VPSHABmr 5346 +VPSHABrm 5347 +VPSHABrr 5348 +VPSHABrr_REV 5349 +VPSHADmr 5350 +VPSHADrm 5351 +VPSHADrr 5352 +VPSHADrr_REV 5353 +VPSHAQmr 5354 +VPSHAQrm 5355 +VPSHAQrr 5356 +VPSHAQrr_REV 5357 +VPSHAWmr 5358 +VPSHAWrm 5359 +VPSHAWrr 5360 +VPSHAWrr_REV 5361 +VPSHLBmr 5362 +VPSHLBrm 5363 +VPSHLBrr 5364 +VPSHLBrr_REV 5365 +VPSHLDDZ 5366 +VPSHLDDZrmbi 5367 +VPSHLDDZrmbik 5368 +VPSHLDDZrmbikz 5369 +VPSHLDDZrmi 5370 +VPSHLDDZrmik 5371 +VPSHLDDZrmikz 5372 +VPSHLDDZrri 5373 +VPSHLDDZrrik 5374 +VPSHLDDZrrikz 5375 +VPSHLDQZ 5376 +VPSHLDQZrmbi 5377 +VPSHLDQZrmbik 5378 +VPSHLDQZrmbikz 5379 +VPSHLDQZrmi 5380 +VPSHLDQZrmik 5381 +VPSHLDQZrmikz 5382 +VPSHLDQZrri 5383 +VPSHLDQZrrik 5384 +VPSHLDQZrrikz 5385 +VPSHLDVDZ 5386 +VPSHLDVDZm 5387 +VPSHLDVDZmb 5388 +VPSHLDVDZmbk 5389 +VPSHLDVDZmbkz 5390 +VPSHLDVDZmk 5391 +VPSHLDVDZmkz 5392 +VPSHLDVDZr 5393 +VPSHLDVDZrk 5394 +VPSHLDVDZrkz 5395 +VPSHLDVQZ 5396 +VPSHLDVQZm 5397 +VPSHLDVQZmb 5398 +VPSHLDVQZmbk 5399 +VPSHLDVQZmbkz 5400 +VPSHLDVQZmk 5401 +VPSHLDVQZmkz 5402 +VPSHLDVQZr 5403 +VPSHLDVQZrk 5404 +VPSHLDVQZrkz 5405 +VPSHLDVWZ 5406 +VPSHLDVWZm 5407 +VPSHLDVWZmk 5408 +VPSHLDVWZmkz 5409 +VPSHLDVWZr 5410 +VPSHLDVWZrk 5411 +VPSHLDVWZrkz 5412 +VPSHLDWZ 5413 +VPSHLDWZrmi 5414 +VPSHLDWZrmik 5415 +VPSHLDWZrmikz 5416 +VPSHLDWZrri 5417 +VPSHLDWZrrik 5418 +VPSHLDWZrrikz 5419 +VPSHLDmr 5420 +VPSHLDrm 5421 +VPSHLDrr 5422 +VPSHLDrr_REV 5423 +VPSHLQmr 5424 +VPSHLQrm 5425 +VPSHLQrr 5426 +VPSHLQrr_REV 5427 +VPSHLWmr 5428 +VPSHLWrm 5429 +VPSHLWrr 5430 +VPSHLWrr_REV 5431 +VPSHRDDZ 5432 +VPSHRDDZrmbi 5433 +VPSHRDDZrmbik 5434 +VPSHRDDZrmbikz 5435 +VPSHRDDZrmi 5436 +VPSHRDDZrmik 5437 +VPSHRDDZrmikz 5438 +VPSHRDDZrri 5439 +VPSHRDDZrrik 5440 +VPSHRDDZrrikz 5441 +VPSHRDQZ 5442 +VPSHRDQZrmbi 5443 +VPSHRDQZrmbik 5444 +VPSHRDQZrmbikz 5445 +VPSHRDQZrmi 5446 +VPSHRDQZrmik 5447 +VPSHRDQZrmikz 5448 +VPSHRDQZrri 5449 +VPSHRDQZrrik 5450 +VPSHRDQZrrikz 5451 +VPSHRDVDZ 5452 +VPSHRDVDZm 5453 +VPSHRDVDZmb 5454 +VPSHRDVDZmbk 5455 +VPSHRDVDZmbkz 5456 +VPSHRDVDZmk 5457 +VPSHRDVDZmkz 5458 +VPSHRDVDZr 5459 +VPSHRDVDZrk 5460 +VPSHRDVDZrkz 5461 +VPSHRDVQZ 5462 +VPSHRDVQZm 5463 +VPSHRDVQZmb 5464 +VPSHRDVQZmbk 5465 +VPSHRDVQZmbkz 5466 +VPSHRDVQZmk 5467 +VPSHRDVQZmkz 5468 +VPSHRDVQZr 5469 +VPSHRDVQZrk 5470 +VPSHRDVQZrkz 5471 +VPSHRDVWZ 5472 +VPSHRDVWZm 5473 +VPSHRDVWZmk 5474 +VPSHRDVWZmkz 5475 +VPSHRDVWZr 5476 +VPSHRDVWZrk 5477 +VPSHRDVWZrkz 5478 +VPSHRDWZ 5479 +VPSHRDWZrmi 5480 +VPSHRDWZrmik 5481 +VPSHRDWZrmikz 5482 +VPSHRDWZrri 5483 +VPSHRDWZrrik 5484 +VPSHRDWZrrikz 5485 +VPSHUFBITQMBZ 5486 +VPSHUFBITQMBZrm 5487 +VPSHUFBITQMBZrmk 5488 +VPSHUFBITQMBZrr 5489 +VPSHUFBITQMBZrrk 5490 +VPSHUFBYrm 5491 +VPSHUFBYrr 5492 +VPSHUFBZ 5493 +VPSHUFBZrm 5494 +VPSHUFBZrmk 5495 +VPSHUFBZrmkz 5496 +VPSHUFBZrr 5497 +VPSHUFBZrrk 5498 +VPSHUFBZrrkz 5499 +VPSHUFBrm 5500 +VPSHUFBrr 5501 +VPSHUFDYmi 5502 +VPSHUFDYri 5503 +VPSHUFDZ 5504 +VPSHUFDZmbi 5505 +VPSHUFDZmbik 5506 +VPSHUFDZmbikz 5507 +VPSHUFDZmi 5508 +VPSHUFDZmik 5509 +VPSHUFDZmikz 5510 +VPSHUFDZri 5511 +VPSHUFDZrik 5512 +VPSHUFDZrikz 5513 +VPSHUFDmi 5514 +VPSHUFDri 5515 +VPSHUFHWYmi 5516 +VPSHUFHWYri 5517 +VPSHUFHWZ 5518 +VPSHUFHWZmi 5519 +VPSHUFHWZmik 5520 +VPSHUFHWZmikz 5521 +VPSHUFHWZri 5522 +VPSHUFHWZrik 5523 +VPSHUFHWZrikz 5524 +VPSHUFHWmi 5525 +VPSHUFHWri 5526 +VPSHUFLWYmi 5527 +VPSHUFLWYri 5528 +VPSHUFLWZ 5529 +VPSHUFLWZmi 5530 +VPSHUFLWZmik 5531 +VPSHUFLWZmikz 5532 +VPSHUFLWZri 5533 +VPSHUFLWZrik 5534 +VPSHUFLWZrikz 5535 +VPSHUFLWmi 5536 +VPSHUFLWri 5537 +VPSIGNBYrm 5538 +VPSIGNBYrr 5539 +VPSIGNBrm 5540 +VPSIGNBrr 5541 +VPSIGNDYrm 5542 +VPSIGNDYrr 5543 +VPSIGNDrm 5544 +VPSIGNDrr 5545 +VPSIGNWYrm 5546 +VPSIGNWYrr 5547 +VPSIGNWrm 5548 +VPSIGNWrr 5549 +VPSLLDQYri 5550 +VPSLLDQZ 5551 +VPSLLDQZmi 5552 +VPSLLDQZri 5553 +VPSLLDQri 5554 +VPSLLDYri 5555 +VPSLLDYrm 5556 +VPSLLDYrr 5557 +VPSLLDZ 5558 +VPSLLDZmbi 5559 +VPSLLDZmbik 5560 +VPSLLDZmbikz 5561 +VPSLLDZmi 5562 +VPSLLDZmik 5563 +VPSLLDZmikz 5564 +VPSLLDZri 5565 +VPSLLDZrik 5566 +VPSLLDZrikz 5567 +VPSLLDZrm 5568 +VPSLLDZrmk 5569 +VPSLLDZrmkz 5570 +VPSLLDZrr 5571 +VPSLLDZrrk 5572 +VPSLLDZrrkz 5573 +VPSLLDri 5574 +VPSLLDrm 5575 +VPSLLDrr 5576 +VPSLLQYri 5577 +VPSLLQYrm 5578 +VPSLLQYrr 5579 +VPSLLQZ 5580 +VPSLLQZmbi 5581 +VPSLLQZmbik 5582 +VPSLLQZmbikz 5583 +VPSLLQZmi 5584 +VPSLLQZmik 5585 +VPSLLQZmikz 5586 +VPSLLQZri 5587 +VPSLLQZrik 5588 +VPSLLQZrikz 5589 +VPSLLQZrm 5590 +VPSLLQZrmk 5591 +VPSLLQZrmkz 5592 +VPSLLQZrr 5593 +VPSLLQZrrk 5594 +VPSLLQZrrkz 5595 +VPSLLQri 5596 +VPSLLQrm 5597 +VPSLLQrr 5598 +VPSLLVDYrm 5599 +VPSLLVDYrr 5600 +VPSLLVDZ 5601 +VPSLLVDZrm 5602 +VPSLLVDZrmb 5603 +VPSLLVDZrmbk 5604 +VPSLLVDZrmbkz 5605 +VPSLLVDZrmk 5606 +VPSLLVDZrmkz 5607 +VPSLLVDZrr 5608 +VPSLLVDZrrk 5609 +VPSLLVDZrrkz 5610 +VPSLLVDrm 5611 +VPSLLVDrr 5612 +VPSLLVQYrm 5613 +VPSLLVQYrr 5614 +VPSLLVQZ 5615 +VPSLLVQZrm 5616 +VPSLLVQZrmb 5617 +VPSLLVQZrmbk 5618 +VPSLLVQZrmbkz 5619 +VPSLLVQZrmk 5620 +VPSLLVQZrmkz 5621 +VPSLLVQZrr 5622 +VPSLLVQZrrk 5623 +VPSLLVQZrrkz 5624 +VPSLLVQrm 5625 +VPSLLVQrr 5626 +VPSLLVWZ 5627 +VPSLLVWZrm 5628 +VPSLLVWZrmk 5629 +VPSLLVWZrmkz 5630 +VPSLLVWZrr 5631 +VPSLLVWZrrk 5632 +VPSLLVWZrrkz 5633 +VPSLLWYri 5634 +VPSLLWYrm 5635 +VPSLLWYrr 5636 +VPSLLWZ 5637 +VPSLLWZmi 5638 +VPSLLWZmik 5639 +VPSLLWZmikz 5640 +VPSLLWZri 5641 +VPSLLWZrik 5642 +VPSLLWZrikz 5643 +VPSLLWZrm 5644 +VPSLLWZrmk 5645 +VPSLLWZrmkz 5646 +VPSLLWZrr 5647 +VPSLLWZrrk 5648 +VPSLLWZrrkz 5649 +VPSLLWri 5650 +VPSLLWrm 5651 +VPSLLWrr 5652 +VPSRADYri 5653 +VPSRADYrm 5654 +VPSRADYrr 5655 +VPSRADZ 5656 +VPSRADZmbi 5657 +VPSRADZmbik 5658 +VPSRADZmbikz 5659 +VPSRADZmi 5660 +VPSRADZmik 5661 +VPSRADZmikz 5662 +VPSRADZri 5663 +VPSRADZrik 5664 +VPSRADZrikz 5665 +VPSRADZrm 5666 +VPSRADZrmk 5667 +VPSRADZrmkz 5668 +VPSRADZrr 5669 +VPSRADZrrk 5670 +VPSRADZrrkz 5671 +VPSRADri 5672 +VPSRADrm 5673 +VPSRADrr 5674 +VPSRAQZ 5675 +VPSRAQZmbi 5676 +VPSRAQZmbik 5677 +VPSRAQZmbikz 5678 +VPSRAQZmi 5679 +VPSRAQZmik 5680 +VPSRAQZmikz 5681 +VPSRAQZri 5682 +VPSRAQZrik 5683 +VPSRAQZrikz 5684 +VPSRAQZrm 5685 +VPSRAQZrmk 5686 +VPSRAQZrmkz 5687 +VPSRAQZrr 5688 +VPSRAQZrrk 5689 +VPSRAQZrrkz 5690 +VPSRAVDYrm 5691 +VPSRAVDYrr 5692 +VPSRAVDZ 5693 +VPSRAVDZrm 5694 +VPSRAVDZrmb 5695 +VPSRAVDZrmbk 5696 +VPSRAVDZrmbkz 5697 +VPSRAVDZrmk 5698 +VPSRAVDZrmkz 5699 +VPSRAVDZrr 5700 +VPSRAVDZrrk 5701 +VPSRAVDZrrkz 5702 +VPSRAVDrm 5703 +VPSRAVDrr 5704 +VPSRAVQZ 5705 +VPSRAVQZrm 5706 +VPSRAVQZrmb 5707 +VPSRAVQZrmbk 5708 +VPSRAVQZrmbkz 5709 +VPSRAVQZrmk 5710 +VPSRAVQZrmkz 5711 +VPSRAVQZrr 5712 +VPSRAVQZrrk 5713 +VPSRAVQZrrkz 5714 +VPSRAVWZ 5715 +VPSRAVWZrm 5716 +VPSRAVWZrmk 5717 +VPSRAVWZrmkz 5718 +VPSRAVWZrr 5719 +VPSRAVWZrrk 5720 +VPSRAVWZrrkz 5721 +VPSRAWYri 5722 +VPSRAWYrm 5723 +VPSRAWYrr 5724 +VPSRAWZ 5725 +VPSRAWZmi 5726 +VPSRAWZmik 5727 +VPSRAWZmikz 5728 +VPSRAWZri 5729 +VPSRAWZrik 5730 +VPSRAWZrikz 5731 +VPSRAWZrm 5732 +VPSRAWZrmk 5733 +VPSRAWZrmkz 5734 +VPSRAWZrr 5735 +VPSRAWZrrk 5736 +VPSRAWZrrkz 5737 +VPSRAWri 5738 +VPSRAWrm 5739 +VPSRAWrr 5740 +VPSRLDQYri 5741 +VPSRLDQZ 5742 +VPSRLDQZmi 5743 +VPSRLDQZri 5744 +VPSRLDQri 5745 +VPSRLDYri 5746 +VPSRLDYrm 5747 +VPSRLDYrr 5748 +VPSRLDZ 5749 +VPSRLDZmbi 5750 +VPSRLDZmbik 5751 +VPSRLDZmbikz 5752 +VPSRLDZmi 5753 +VPSRLDZmik 5754 +VPSRLDZmikz 5755 +VPSRLDZri 5756 +VPSRLDZrik 5757 +VPSRLDZrikz 5758 +VPSRLDZrm 5759 +VPSRLDZrmk 5760 +VPSRLDZrmkz 5761 +VPSRLDZrr 5762 +VPSRLDZrrk 5763 +VPSRLDZrrkz 5764 +VPSRLDri 5765 +VPSRLDrm 5766 +VPSRLDrr 5767 +VPSRLQYri 5768 +VPSRLQYrm 5769 +VPSRLQYrr 5770 +VPSRLQZ 5771 +VPSRLQZmbi 5772 +VPSRLQZmbik 5773 +VPSRLQZmbikz 5774 +VPSRLQZmi 5775 +VPSRLQZmik 5776 +VPSRLQZmikz 5777 +VPSRLQZri 5778 +VPSRLQZrik 5779 +VPSRLQZrikz 5780 +VPSRLQZrm 5781 +VPSRLQZrmk 5782 +VPSRLQZrmkz 5783 +VPSRLQZrr 5784 +VPSRLQZrrk 5785 +VPSRLQZrrkz 5786 +VPSRLQri 5787 +VPSRLQrm 5788 +VPSRLQrr 5789 +VPSRLVDYrm 5790 +VPSRLVDYrr 5791 +VPSRLVDZ 5792 +VPSRLVDZrm 5793 +VPSRLVDZrmb 5794 +VPSRLVDZrmbk 5795 +VPSRLVDZrmbkz 5796 +VPSRLVDZrmk 5797 +VPSRLVDZrmkz 5798 +VPSRLVDZrr 5799 +VPSRLVDZrrk 5800 +VPSRLVDZrrkz 5801 +VPSRLVDrm 5802 +VPSRLVDrr 5803 +VPSRLVQYrm 5804 +VPSRLVQYrr 5805 +VPSRLVQZ 5806 +VPSRLVQZrm 5807 +VPSRLVQZrmb 5808 +VPSRLVQZrmbk 5809 +VPSRLVQZrmbkz 5810 +VPSRLVQZrmk 5811 +VPSRLVQZrmkz 5812 +VPSRLVQZrr 5813 +VPSRLVQZrrk 5814 +VPSRLVQZrrkz 5815 +VPSRLVQrm 5816 +VPSRLVQrr 5817 +VPSRLVWZ 5818 +VPSRLVWZrm 5819 +VPSRLVWZrmk 5820 +VPSRLVWZrmkz 5821 +VPSRLVWZrr 5822 +VPSRLVWZrrk 5823 +VPSRLVWZrrkz 5824 +VPSRLWYri 5825 +VPSRLWYrm 5826 +VPSRLWYrr 5827 +VPSRLWZ 5828 +VPSRLWZmi 5829 +VPSRLWZmik 5830 +VPSRLWZmikz 5831 +VPSRLWZri 5832 +VPSRLWZrik 5833 +VPSRLWZrikz 5834 +VPSRLWZrm 5835 +VPSRLWZrmk 5836 +VPSRLWZrmkz 5837 +VPSRLWZrr 5838 +VPSRLWZrrk 5839 +VPSRLWZrrkz 5840 +VPSRLWri 5841 +VPSRLWrm 5842 +VPSRLWrr 5843 +VPSUBBYrm 5844 +VPSUBBYrr 5845 +VPSUBBZ 5846 +VPSUBBZrm 5847 +VPSUBBZrmk 5848 +VPSUBBZrmkz 5849 +VPSUBBZrr 5850 +VPSUBBZrrk 5851 +VPSUBBZrrkz 5852 +VPSUBBrm 5853 +VPSUBBrr 5854 +VPSUBDYrm 5855 +VPSUBDYrr 5856 +VPSUBDZ 5857 +VPSUBDZrm 5858 +VPSUBDZrmb 5859 +VPSUBDZrmbk 5860 +VPSUBDZrmbkz 5861 +VPSUBDZrmk 5862 +VPSUBDZrmkz 5863 +VPSUBDZrr 5864 +VPSUBDZrrk 5865 +VPSUBDZrrkz 5866 +VPSUBDrm 5867 +VPSUBDrr 5868 +VPSUBQYrm 5869 +VPSUBQYrr 5870 +VPSUBQZ 5871 +VPSUBQZrm 5872 +VPSUBQZrmb 5873 +VPSUBQZrmbk 5874 +VPSUBQZrmbkz 5875 +VPSUBQZrmk 5876 +VPSUBQZrmkz 5877 +VPSUBQZrr 5878 +VPSUBQZrrk 5879 +VPSUBQZrrkz 5880 +VPSUBQrm 5881 +VPSUBQrr 5882 +VPSUBSBYrm 5883 +VPSUBSBYrr 5884 +VPSUBSBZ 5885 +VPSUBSBZrm 5886 +VPSUBSBZrmk 5887 +VPSUBSBZrmkz 5888 +VPSUBSBZrr 5889 +VPSUBSBZrrk 5890 +VPSUBSBZrrkz 5891 +VPSUBSBrm 5892 +VPSUBSBrr 5893 +VPSUBSWYrm 5894 +VPSUBSWYrr 5895 +VPSUBSWZ 5896 +VPSUBSWZrm 5897 +VPSUBSWZrmk 5898 +VPSUBSWZrmkz 5899 +VPSUBSWZrr 5900 +VPSUBSWZrrk 5901 +VPSUBSWZrrkz 5902 +VPSUBSWrm 5903 +VPSUBSWrr 5904 +VPSUBUSBYrm 5905 +VPSUBUSBYrr 5906 +VPSUBUSBZ 5907 +VPSUBUSBZrm 5908 +VPSUBUSBZrmk 5909 +VPSUBUSBZrmkz 5910 +VPSUBUSBZrr 5911 +VPSUBUSBZrrk 5912 +VPSUBUSBZrrkz 5913 +VPSUBUSBrm 5914 +VPSUBUSBrr 5915 +VPSUBUSWYrm 5916 +VPSUBUSWYrr 5917 +VPSUBUSWZ 5918 +VPSUBUSWZrm 5919 +VPSUBUSWZrmk 5920 +VPSUBUSWZrmkz 5921 +VPSUBUSWZrr 5922 +VPSUBUSWZrrk 5923 +VPSUBUSWZrrkz 5924 +VPSUBUSWrm 5925 +VPSUBUSWrr 5926 +VPSUBWYrm 5927 +VPSUBWYrr 5928 +VPSUBWZ 5929 +VPSUBWZrm 5930 +VPSUBWZrmk 5931 +VPSUBWZrmkz 5932 +VPSUBWZrr 5933 +VPSUBWZrrk 5934 +VPSUBWZrrkz 5935 +VPSUBWrm 5936 +VPSUBWrr 5937 +VPTERNLOGDZ 5938 +VPTERNLOGDZrmbi 5939 +VPTERNLOGDZrmbik 5940 +VPTERNLOGDZrmbikz 5941 +VPTERNLOGDZrmi 5942 +VPTERNLOGDZrmik 5943 +VPTERNLOGDZrmikz 5944 +VPTERNLOGDZrri 5945 +VPTERNLOGDZrrik 5946 +VPTERNLOGDZrrikz 5947 +VPTERNLOGQZ 5948 +VPTERNLOGQZrmbi 5949 +VPTERNLOGQZrmbik 5950 +VPTERNLOGQZrmbikz 5951 +VPTERNLOGQZrmi 5952 +VPTERNLOGQZrmik 5953 +VPTERNLOGQZrmikz 5954 +VPTERNLOGQZrri 5955 +VPTERNLOGQZrrik 5956 +VPTERNLOGQZrrikz 5957 +VPTESTMBZ 5958 +VPTESTMBZrm 5959 +VPTESTMBZrmk 5960 +VPTESTMBZrr 5961 +VPTESTMBZrrk 5962 +VPTESTMDZ 5963 +VPTESTMDZrm 5964 +VPTESTMDZrmb 5965 +VPTESTMDZrmbk 5966 +VPTESTMDZrmk 5967 +VPTESTMDZrr 5968 +VPTESTMDZrrk 5969 +VPTESTMQZ 5970 +VPTESTMQZrm 5971 +VPTESTMQZrmb 5972 +VPTESTMQZrmbk 5973 +VPTESTMQZrmk 5974 +VPTESTMQZrr 5975 +VPTESTMQZrrk 5976 +VPTESTMWZ 5977 +VPTESTMWZrm 5978 +VPTESTMWZrmk 5979 +VPTESTMWZrr 5980 +VPTESTMWZrrk 5981 +VPTESTNMBZ 5982 +VPTESTNMBZrm 5983 +VPTESTNMBZrmk 5984 +VPTESTNMBZrr 5985 +VPTESTNMBZrrk 5986 +VPTESTNMDZ 5987 +VPTESTNMDZrm 5988 +VPTESTNMDZrmb 5989 +VPTESTNMDZrmbk 5990 +VPTESTNMDZrmk 5991 +VPTESTNMDZrr 5992 +VPTESTNMDZrrk 5993 +VPTESTNMQZ 5994 +VPTESTNMQZrm 5995 +VPTESTNMQZrmb 5996 +VPTESTNMQZrmbk 5997 +VPTESTNMQZrmk 5998 +VPTESTNMQZrr 5999 +VPTESTNMQZrrk 6000 +VPTESTNMWZ 6001 +VPTESTNMWZrm 6002 +VPTESTNMWZrmk 6003 +VPTESTNMWZrr 6004 +VPTESTNMWZrrk 6005 +VPTESTYrm 6006 +VPTESTYrr 6007 +VPTESTrm 6008 +VPTESTrr 6009 +VPUNPCKHBWYrm 6010 +VPUNPCKHBWYrr 6011 +VPUNPCKHBWZ 6012 +VPUNPCKHBWZrm 6013 +VPUNPCKHBWZrmk 6014 +VPUNPCKHBWZrmkz 6015 +VPUNPCKHBWZrr 6016 +VPUNPCKHBWZrrk 6017 +VPUNPCKHBWZrrkz 6018 +VPUNPCKHBWrm 6019 +VPUNPCKHBWrr 6020 +VPUNPCKHDQYrm 6021 +VPUNPCKHDQYrr 6022 +VPUNPCKHDQZ 6023 +VPUNPCKHDQZrm 6024 +VPUNPCKHDQZrmb 6025 +VPUNPCKHDQZrmbk 6026 +VPUNPCKHDQZrmbkz 6027 +VPUNPCKHDQZrmk 6028 +VPUNPCKHDQZrmkz 6029 +VPUNPCKHDQZrr 6030 +VPUNPCKHDQZrrk 6031 +VPUNPCKHDQZrrkz 6032 +VPUNPCKHDQrm 6033 +VPUNPCKHDQrr 6034 +VPUNPCKHQDQYrm 6035 +VPUNPCKHQDQYrr 6036 +VPUNPCKHQDQZ 6037 +VPUNPCKHQDQZrm 6038 +VPUNPCKHQDQZrmb 6039 +VPUNPCKHQDQZrmbk 6040 +VPUNPCKHQDQZrmbkz 6041 +VPUNPCKHQDQZrmk 6042 +VPUNPCKHQDQZrmkz 6043 +VPUNPCKHQDQZrr 6044 +VPUNPCKHQDQZrrk 6045 +VPUNPCKHQDQZrrkz 6046 +VPUNPCKHQDQrm 6047 +VPUNPCKHQDQrr 6048 +VPUNPCKHWDYrm 6049 +VPUNPCKHWDYrr 6050 +VPUNPCKHWDZ 6051 +VPUNPCKHWDZrm 6052 +VPUNPCKHWDZrmk 6053 +VPUNPCKHWDZrmkz 6054 +VPUNPCKHWDZrr 6055 +VPUNPCKHWDZrrk 6056 +VPUNPCKHWDZrrkz 6057 +VPUNPCKHWDrm 6058 +VPUNPCKHWDrr 6059 +VPUNPCKLBWYrm 6060 +VPUNPCKLBWYrr 6061 +VPUNPCKLBWZ 6062 +VPUNPCKLBWZrm 6063 +VPUNPCKLBWZrmk 6064 +VPUNPCKLBWZrmkz 6065 +VPUNPCKLBWZrr 6066 +VPUNPCKLBWZrrk 6067 +VPUNPCKLBWZrrkz 6068 +VPUNPCKLBWrm 6069 +VPUNPCKLBWrr 6070 +VPUNPCKLDQYrm 6071 +VPUNPCKLDQYrr 6072 +VPUNPCKLDQZ 6073 +VPUNPCKLDQZrm 6074 +VPUNPCKLDQZrmb 6075 +VPUNPCKLDQZrmbk 6076 +VPUNPCKLDQZrmbkz 6077 +VPUNPCKLDQZrmk 6078 +VPUNPCKLDQZrmkz 6079 +VPUNPCKLDQZrr 6080 +VPUNPCKLDQZrrk 6081 +VPUNPCKLDQZrrkz 6082 +VPUNPCKLDQrm 6083 +VPUNPCKLDQrr 6084 +VPUNPCKLQDQYrm 6085 +VPUNPCKLQDQYrr 6086 +VPUNPCKLQDQZ 6087 +VPUNPCKLQDQZrm 6088 +VPUNPCKLQDQZrmb 6089 +VPUNPCKLQDQZrmbk 6090 +VPUNPCKLQDQZrmbkz 6091 +VPUNPCKLQDQZrmk 6092 +VPUNPCKLQDQZrmkz 6093 +VPUNPCKLQDQZrr 6094 +VPUNPCKLQDQZrrk 6095 +VPUNPCKLQDQZrrkz 6096 +VPUNPCKLQDQrm 6097 +VPUNPCKLQDQrr 6098 +VPUNPCKLWDYrm 6099 +VPUNPCKLWDYrr 6100 +VPUNPCKLWDZ 6101 +VPUNPCKLWDZrm 6102 +VPUNPCKLWDZrmk 6103 +VPUNPCKLWDZrmkz 6104 +VPUNPCKLWDZrr 6105 +VPUNPCKLWDZrrk 6106 +VPUNPCKLWDZrrkz 6107 +VPUNPCKLWDrm 6108 +VPUNPCKLWDrr 6109 +VPXORDZ 6110 +VPXORDZrm 6111 +VPXORDZrmb 6112 +VPXORDZrmbk 6113 +VPXORDZrmbkz 6114 +VPXORDZrmk 6115 +VPXORDZrmkz 6116 +VPXORDZrr 6117 +VPXORDZrrk 6118 +VPXORDZrrkz 6119 +VPXORQZ 6120 +VPXORQZrm 6121 +VPXORQZrmb 6122 +VPXORQZrmbk 6123 +VPXORQZrmbkz 6124 +VPXORQZrmk 6125 +VPXORQZrmkz 6126 +VPXORQZrr 6127 +VPXORQZrrk 6128 +VPXORQZrrkz 6129 +VPXORYrm 6130 +VPXORYrr 6131 +VPXORrm 6132 +VPXORrr 6133 +VRANGEPDZ 6134 +VRANGEPDZrmbi 6135 +VRANGEPDZrmbik 6136 +VRANGEPDZrmbikz 6137 +VRANGEPDZrmi 6138 +VRANGEPDZrmik 6139 +VRANGEPDZrmikz 6140 +VRANGEPDZrri 6141 +VRANGEPDZrrib 6142 +VRANGEPDZrribk 6143 +VRANGEPDZrribkz 6144 +VRANGEPDZrrik 6145 +VRANGEPDZrrikz 6146 +VRANGEPSZ 6147 +VRANGEPSZrmbi 6148 +VRANGEPSZrmbik 6149 +VRANGEPSZrmbikz 6150 +VRANGEPSZrmi 6151 +VRANGEPSZrmik 6152 +VRANGEPSZrmikz 6153 +VRANGEPSZrri 6154 +VRANGEPSZrrib 6155 +VRANGEPSZrribk 6156 +VRANGEPSZrribkz 6157 +VRANGEPSZrrik 6158 +VRANGEPSZrrikz 6159 +VRANGESDZrmi 6160 +VRANGESDZrmik 6161 +VRANGESDZrmikz 6162 +VRANGESDZrri 6163 +VRANGESDZrrib 6164 +VRANGESDZrribk 6165 +VRANGESDZrribkz 6166 +VRANGESDZrrik 6167 +VRANGESDZrrikz 6168 +VRANGESSZrmi 6169 +VRANGESSZrmik 6170 +VRANGESSZrmikz 6171 +VRANGESSZrri 6172 +VRANGESSZrrib 6173 +VRANGESSZrribk 6174 +VRANGESSZrribkz 6175 +VRANGESSZrrik 6176 +VRANGESSZrrikz 6177 +VRCP 6178 +VRCPBF 6179 +VRCPPHZ 6180 +VRCPPHZm 6181 +VRCPPHZmb 6182 +VRCPPHZmbk 6183 +VRCPPHZmbkz 6184 +VRCPPHZmk 6185 +VRCPPHZmkz 6186 +VRCPPHZr 6187 +VRCPPHZrk 6188 +VRCPPHZrkz 6189 +VRCPPSYm 6190 +VRCPPSYr 6191 +VRCPPSm 6192 +VRCPPSr 6193 +VRCPSHZrm 6194 +VRCPSHZrmk 6195 +VRCPSHZrmkz 6196 +VRCPSHZrr 6197 +VRCPSHZrrk 6198 +VRCPSHZrrkz 6199 +VRCPSSm 6200 +VRCPSSm_Int 6201 +VRCPSSr 6202 +VRCPSSr_Int 6203 +VREDUCEBF 6204 +VREDUCEPDZ 6205 +VREDUCEPDZrmbi 6206 +VREDUCEPDZrmbik 6207 +VREDUCEPDZrmbikz 6208 +VREDUCEPDZrmi 6209 +VREDUCEPDZrmik 6210 +VREDUCEPDZrmikz 6211 +VREDUCEPDZrri 6212 +VREDUCEPDZrrib 6213 +VREDUCEPDZrribk 6214 +VREDUCEPDZrribkz 6215 +VREDUCEPDZrrik 6216 +VREDUCEPDZrrikz 6217 +VREDUCEPHZ 6218 +VREDUCEPHZrmbi 6219 +VREDUCEPHZrmbik 6220 +VREDUCEPHZrmbikz 6221 +VREDUCEPHZrmi 6222 +VREDUCEPHZrmik 6223 +VREDUCEPHZrmikz 6224 +VREDUCEPHZrri 6225 +VREDUCEPHZrrib 6226 +VREDUCEPHZrribk 6227 +VREDUCEPHZrribkz 6228 +VREDUCEPHZrrik 6229 +VREDUCEPHZrrikz 6230 +VREDUCEPSZ 6231 +VREDUCEPSZrmbi 6232 +VREDUCEPSZrmbik 6233 +VREDUCEPSZrmbikz 6234 +VREDUCEPSZrmi 6235 +VREDUCEPSZrmik 6236 +VREDUCEPSZrmikz 6237 +VREDUCEPSZrri 6238 +VREDUCEPSZrrib 6239 +VREDUCEPSZrribk 6240 +VREDUCEPSZrribkz 6241 +VREDUCEPSZrrik 6242 +VREDUCEPSZrrikz 6243 +VREDUCESDZrmi 6244 +VREDUCESDZrmik 6245 +VREDUCESDZrmikz 6246 +VREDUCESDZrri 6247 +VREDUCESDZrrib 6248 +VREDUCESDZrribk 6249 +VREDUCESDZrribkz 6250 +VREDUCESDZrrik 6251 +VREDUCESDZrrikz 6252 +VREDUCESHZrmi 6253 +VREDUCESHZrmik 6254 +VREDUCESHZrmikz 6255 +VREDUCESHZrri 6256 +VREDUCESHZrrib 6257 +VREDUCESHZrribk 6258 +VREDUCESHZrribkz 6259 +VREDUCESHZrrik 6260 +VREDUCESHZrrikz 6261 +VREDUCESSZrmi 6262 +VREDUCESSZrmik 6263 +VREDUCESSZrmikz 6264 +VREDUCESSZrri 6265 +VREDUCESSZrrib 6266 +VREDUCESSZrribk 6267 +VREDUCESSZrribkz 6268 +VREDUCESSZrrik 6269 +VREDUCESSZrrikz 6270 +VRNDSCALEBF 6271 +VRNDSCALEPDZ 6272 +VRNDSCALEPDZrmbi 6273 +VRNDSCALEPDZrmbik 6274 +VRNDSCALEPDZrmbikz 6275 +VRNDSCALEPDZrmi 6276 +VRNDSCALEPDZrmik 6277 +VRNDSCALEPDZrmikz 6278 +VRNDSCALEPDZrri 6279 +VRNDSCALEPDZrrib 6280 +VRNDSCALEPDZrribk 6281 +VRNDSCALEPDZrribkz 6282 +VRNDSCALEPDZrrik 6283 +VRNDSCALEPDZrrikz 6284 +VRNDSCALEPHZ 6285 +VRNDSCALEPHZrmbi 6286 +VRNDSCALEPHZrmbik 6287 +VRNDSCALEPHZrmbikz 6288 +VRNDSCALEPHZrmi 6289 +VRNDSCALEPHZrmik 6290 +VRNDSCALEPHZrmikz 6291 +VRNDSCALEPHZrri 6292 +VRNDSCALEPHZrrib 6293 +VRNDSCALEPHZrribk 6294 +VRNDSCALEPHZrribkz 6295 +VRNDSCALEPHZrrik 6296 +VRNDSCALEPHZrrikz 6297 +VRNDSCALEPSZ 6298 +VRNDSCALEPSZrmbi 6299 +VRNDSCALEPSZrmbik 6300 +VRNDSCALEPSZrmbikz 6301 +VRNDSCALEPSZrmi 6302 +VRNDSCALEPSZrmik 6303 +VRNDSCALEPSZrmikz 6304 +VRNDSCALEPSZrri 6305 +VRNDSCALEPSZrrib 6306 +VRNDSCALEPSZrribk 6307 +VRNDSCALEPSZrribkz 6308 +VRNDSCALEPSZrrik 6309 +VRNDSCALEPSZrrikz 6310 +VRNDSCALESDZrmi 6311 +VRNDSCALESDZrmi_Int 6312 +VRNDSCALESDZrmik_Int 6313 +VRNDSCALESDZrmikz_Int 6314 +VRNDSCALESDZrri 6315 +VRNDSCALESDZrri_Int 6316 +VRNDSCALESDZrrib_Int 6317 +VRNDSCALESDZrribk_Int 6318 +VRNDSCALESDZrribkz_Int 6319 +VRNDSCALESDZrrik_Int 6320 +VRNDSCALESDZrrikz_Int 6321 +VRNDSCALESHZrmi 6322 +VRNDSCALESHZrmi_Int 6323 +VRNDSCALESHZrmik_Int 6324 +VRNDSCALESHZrmikz_Int 6325 +VRNDSCALESHZrri 6326 +VRNDSCALESHZrri_Int 6327 +VRNDSCALESHZrrib_Int 6328 +VRNDSCALESHZrribk_Int 6329 +VRNDSCALESHZrribkz_Int 6330 +VRNDSCALESHZrrik_Int 6331 +VRNDSCALESHZrrikz_Int 6332 +VRNDSCALESSZrmi 6333 +VRNDSCALESSZrmi_Int 6334 +VRNDSCALESSZrmik_Int 6335 +VRNDSCALESSZrmikz_Int 6336 +VRNDSCALESSZrri 6337 +VRNDSCALESSZrri_Int 6338 +VRNDSCALESSZrrib_Int 6339 +VRNDSCALESSZrribk_Int 6340 +VRNDSCALESSZrribkz_Int 6341 +VRNDSCALESSZrrik_Int 6342 +VRNDSCALESSZrrikz_Int 6343 +VROUNDPDYmi 6344 +VROUNDPDYri 6345 +VROUNDPDmi 6346 +VROUNDPDri 6347 +VROUNDPSYmi 6348 +VROUNDPSYri 6349 +VROUNDPSmi 6350 +VROUNDPSri 6351 +VROUNDSDmi 6352 +VROUNDSDmi_Int 6353 +VROUNDSDri 6354 +VROUNDSDri_Int 6355 +VROUNDSSmi 6356 +VROUNDSSmi_Int 6357 +VROUNDSSri 6358 +VROUNDSSri_Int 6359 +VRSQRT 6360 +VRSQRTBF 6361 +VRSQRTPHZ 6362 +VRSQRTPHZm 6363 +VRSQRTPHZmb 6364 +VRSQRTPHZmbk 6365 +VRSQRTPHZmbkz 6366 +VRSQRTPHZmk 6367 +VRSQRTPHZmkz 6368 +VRSQRTPHZr 6369 +VRSQRTPHZrk 6370 +VRSQRTPHZrkz 6371 +VRSQRTPSYm 6372 +VRSQRTPSYr 6373 +VRSQRTPSm 6374 +VRSQRTPSr 6375 +VRSQRTSHZrm 6376 +VRSQRTSHZrmk 6377 +VRSQRTSHZrmkz 6378 +VRSQRTSHZrr 6379 +VRSQRTSHZrrk 6380 +VRSQRTSHZrrkz 6381 +VRSQRTSSm 6382 +VRSQRTSSm_Int 6383 +VRSQRTSSr 6384 +VRSQRTSSr_Int 6385 +VSCALEFBF 6386 +VSCALEFPDZ 6387 +VSCALEFPDZrm 6388 +VSCALEFPDZrmb 6389 +VSCALEFPDZrmbk 6390 +VSCALEFPDZrmbkz 6391 +VSCALEFPDZrmk 6392 +VSCALEFPDZrmkz 6393 +VSCALEFPDZrr 6394 +VSCALEFPDZrrb 6395 +VSCALEFPDZrrbk 6396 +VSCALEFPDZrrbkz 6397 +VSCALEFPDZrrk 6398 +VSCALEFPDZrrkz 6399 +VSCALEFPHZ 6400 +VSCALEFPHZrm 6401 +VSCALEFPHZrmb 6402 +VSCALEFPHZrmbk 6403 +VSCALEFPHZrmbkz 6404 +VSCALEFPHZrmk 6405 +VSCALEFPHZrmkz 6406 +VSCALEFPHZrr 6407 +VSCALEFPHZrrb 6408 +VSCALEFPHZrrbk 6409 +VSCALEFPHZrrbkz 6410 +VSCALEFPHZrrk 6411 +VSCALEFPHZrrkz 6412 +VSCALEFPSZ 6413 +VSCALEFPSZrm 6414 +VSCALEFPSZrmb 6415 +VSCALEFPSZrmbk 6416 +VSCALEFPSZrmbkz 6417 +VSCALEFPSZrmk 6418 +VSCALEFPSZrmkz 6419 +VSCALEFPSZrr 6420 +VSCALEFPSZrrb 6421 +VSCALEFPSZrrbk 6422 +VSCALEFPSZrrbkz 6423 +VSCALEFPSZrrk 6424 +VSCALEFPSZrrkz 6425 +VSCALEFSDZrm 6426 +VSCALEFSDZrmk 6427 +VSCALEFSDZrmkz 6428 +VSCALEFSDZrr 6429 +VSCALEFSDZrrb_Int 6430 +VSCALEFSDZrrbk_Int 6431 +VSCALEFSDZrrbkz_Int 6432 +VSCALEFSDZrrk 6433 +VSCALEFSDZrrkz 6434 +VSCALEFSHZrm 6435 +VSCALEFSHZrmk 6436 +VSCALEFSHZrmkz 6437 +VSCALEFSHZrr 6438 +VSCALEFSHZrrb_Int 6439 +VSCALEFSHZrrbk_Int 6440 +VSCALEFSHZrrbkz_Int 6441 +VSCALEFSHZrrk 6442 +VSCALEFSHZrrkz 6443 +VSCALEFSSZrm 6444 +VSCALEFSSZrmk 6445 +VSCALEFSSZrmkz 6446 +VSCALEFSSZrr 6447 +VSCALEFSSZrrb_Int 6448 +VSCALEFSSZrrbk_Int 6449 +VSCALEFSSZrrbkz_Int 6450 +VSCALEFSSZrrk 6451 +VSCALEFSSZrrkz 6452 +VSCATTERDPDZ 6453 +VSCATTERDPDZmr 6454 +VSCATTERDPSZ 6455 +VSCATTERDPSZmr 6456 +VSCATTERPF 6457 +VSCATTERQPDZ 6458 +VSCATTERQPDZmr 6459 +VSCATTERQPSZ 6460 +VSCATTERQPSZmr 6461 +VSHA 6462 +VSHUFF 6463 +VSHUFI 6464 +VSHUFPDYrmi 6465 +VSHUFPDYrri 6466 +VSHUFPDZ 6467 +VSHUFPDZrmbi 6468 +VSHUFPDZrmbik 6469 +VSHUFPDZrmbikz 6470 +VSHUFPDZrmi 6471 +VSHUFPDZrmik 6472 +VSHUFPDZrmikz 6473 +VSHUFPDZrri 6474 +VSHUFPDZrrik 6475 +VSHUFPDZrrikz 6476 +VSHUFPDrmi 6477 +VSHUFPDrri 6478 +VSHUFPSYrmi 6479 +VSHUFPSYrri 6480 +VSHUFPSZ 6481 +VSHUFPSZrmbi 6482 +VSHUFPSZrmbik 6483 +VSHUFPSZrmbikz 6484 +VSHUFPSZrmi 6485 +VSHUFPSZrmik 6486 +VSHUFPSZrmikz 6487 +VSHUFPSZrri 6488 +VSHUFPSZrrik 6489 +VSHUFPSZrrikz 6490 +VSHUFPSrmi 6491 +VSHUFPSrri 6492 +VSM 6493 +VSQRTBF 6494 +VSQRTPDYm 6495 +VSQRTPDYr 6496 +VSQRTPDZ 6497 +VSQRTPDZm 6498 +VSQRTPDZmb 6499 +VSQRTPDZmbk 6500 +VSQRTPDZmbkz 6501 +VSQRTPDZmk 6502 +VSQRTPDZmkz 6503 +VSQRTPDZr 6504 +VSQRTPDZrb 6505 +VSQRTPDZrbk 6506 +VSQRTPDZrbkz 6507 +VSQRTPDZrk 6508 +VSQRTPDZrkz 6509 +VSQRTPDm 6510 +VSQRTPDr 6511 +VSQRTPHZ 6512 +VSQRTPHZm 6513 +VSQRTPHZmb 6514 +VSQRTPHZmbk 6515 +VSQRTPHZmbkz 6516 +VSQRTPHZmk 6517 +VSQRTPHZmkz 6518 +VSQRTPHZr 6519 +VSQRTPHZrb 6520 +VSQRTPHZrbk 6521 +VSQRTPHZrbkz 6522 +VSQRTPHZrk 6523 +VSQRTPHZrkz 6524 +VSQRTPSYm 6525 +VSQRTPSYr 6526 +VSQRTPSZ 6527 +VSQRTPSZm 6528 +VSQRTPSZmb 6529 +VSQRTPSZmbk 6530 +VSQRTPSZmbkz 6531 +VSQRTPSZmk 6532 +VSQRTPSZmkz 6533 +VSQRTPSZr 6534 +VSQRTPSZrb 6535 +VSQRTPSZrbk 6536 +VSQRTPSZrbkz 6537 +VSQRTPSZrk 6538 +VSQRTPSZrkz 6539 +VSQRTPSm 6540 +VSQRTPSr 6541 +VSQRTSDZm 6542 +VSQRTSDZm_Int 6543 +VSQRTSDZmk_Int 6544 +VSQRTSDZmkz_Int 6545 +VSQRTSDZr 6546 +VSQRTSDZr_Int 6547 +VSQRTSDZrb_Int 6548 +VSQRTSDZrbk_Int 6549 +VSQRTSDZrbkz_Int 6550 +VSQRTSDZrk_Int 6551 +VSQRTSDZrkz_Int 6552 +VSQRTSDm 6553 +VSQRTSDm_Int 6554 +VSQRTSDr 6555 +VSQRTSDr_Int 6556 +VSQRTSHZm 6557 +VSQRTSHZm_Int 6558 +VSQRTSHZmk_Int 6559 +VSQRTSHZmkz_Int 6560 +VSQRTSHZr 6561 +VSQRTSHZr_Int 6562 +VSQRTSHZrb_Int 6563 +VSQRTSHZrbk_Int 6564 +VSQRTSHZrbkz_Int 6565 +VSQRTSHZrk_Int 6566 +VSQRTSHZrkz_Int 6567 +VSQRTSSZm 6568 +VSQRTSSZm_Int 6569 +VSQRTSSZmk_Int 6570 +VSQRTSSZmkz_Int 6571 +VSQRTSSZr 6572 +VSQRTSSZr_Int 6573 +VSQRTSSZrb_Int 6574 +VSQRTSSZrbk_Int 6575 +VSQRTSSZrbkz_Int 6576 +VSQRTSSZrk_Int 6577 +VSQRTSSZrkz_Int 6578 +VSQRTSSm 6579 +VSQRTSSm_Int 6580 +VSQRTSSr 6581 +VSQRTSSr_Int 6582 +VSTMXCSR 6583 +VSUBBF 6584 +VSUBPDYrm 6585 +VSUBPDYrr 6586 +VSUBPDZ 6587 +VSUBPDZrm 6588 +VSUBPDZrmb 6589 +VSUBPDZrmbk 6590 +VSUBPDZrmbkz 6591 +VSUBPDZrmk 6592 +VSUBPDZrmkz 6593 +VSUBPDZrr 6594 +VSUBPDZrrb 6595 +VSUBPDZrrbk 6596 +VSUBPDZrrbkz 6597 +VSUBPDZrrk 6598 +VSUBPDZrrkz 6599 +VSUBPDrm 6600 +VSUBPDrr 6601 +VSUBPHZ 6602 +VSUBPHZrm 6603 +VSUBPHZrmb 6604 +VSUBPHZrmbk 6605 +VSUBPHZrmbkz 6606 +VSUBPHZrmk 6607 +VSUBPHZrmkz 6608 +VSUBPHZrr 6609 +VSUBPHZrrb 6610 +VSUBPHZrrbk 6611 +VSUBPHZrrbkz 6612 +VSUBPHZrrk 6613 +VSUBPHZrrkz 6614 +VSUBPSYrm 6615 +VSUBPSYrr 6616 +VSUBPSZ 6617 +VSUBPSZrm 6618 +VSUBPSZrmb 6619 +VSUBPSZrmbk 6620 +VSUBPSZrmbkz 6621 +VSUBPSZrmk 6622 +VSUBPSZrmkz 6623 +VSUBPSZrr 6624 +VSUBPSZrrb 6625 +VSUBPSZrrbk 6626 +VSUBPSZrrbkz 6627 +VSUBPSZrrk 6628 +VSUBPSZrrkz 6629 +VSUBPSrm 6630 +VSUBPSrr 6631 +VSUBSDZrm 6632 +VSUBSDZrm_Int 6633 +VSUBSDZrmk_Int 6634 +VSUBSDZrmkz_Int 6635 +VSUBSDZrr 6636 +VSUBSDZrr_Int 6637 +VSUBSDZrrb_Int 6638 +VSUBSDZrrbk_Int 6639 +VSUBSDZrrbkz_Int 6640 +VSUBSDZrrk_Int 6641 +VSUBSDZrrkz_Int 6642 +VSUBSDrm 6643 +VSUBSDrm_Int 6644 +VSUBSDrr 6645 +VSUBSDrr_Int 6646 +VSUBSHZrm 6647 +VSUBSHZrm_Int 6648 +VSUBSHZrmk_Int 6649 +VSUBSHZrmkz_Int 6650 +VSUBSHZrr 6651 +VSUBSHZrr_Int 6652 +VSUBSHZrrb_Int 6653 +VSUBSHZrrbk_Int 6654 +VSUBSHZrrbkz_Int 6655 +VSUBSHZrrk_Int 6656 +VSUBSHZrrkz_Int 6657 +VSUBSSZrm 6658 +VSUBSSZrm_Int 6659 +VSUBSSZrmk_Int 6660 +VSUBSSZrmkz_Int 6661 +VSUBSSZrr 6662 +VSUBSSZrr_Int 6663 +VSUBSSZrrb_Int 6664 +VSUBSSZrrbk_Int 6665 +VSUBSSZrrbkz_Int 6666 +VSUBSSZrrk_Int 6667 +VSUBSSZrrkz_Int 6668 +VSUBSSrm 6669 +VSUBSSrm_Int 6670 +VSUBSSrr 6671 +VSUBSSrr_Int 6672 +VTESTPDYrm 6673 +VTESTPDYrr 6674 +VTESTPDrm 6675 +VTESTPDrr 6676 +VTESTPSYrm 6677 +VTESTPSYrr 6678 +VTESTPSrm 6679 +VTESTPSrr 6680 +VUCOMISDZrm 6681 +VUCOMISDZrm_Int 6682 +VUCOMISDZrr 6683 +VUCOMISDZrr_Int 6684 +VUCOMISDZrrb 6685 +VUCOMISDrm 6686 +VUCOMISDrm_Int 6687 +VUCOMISDrr 6688 +VUCOMISDrr_Int 6689 +VUCOMISHZrm 6690 +VUCOMISHZrm_Int 6691 +VUCOMISHZrr 6692 +VUCOMISHZrr_Int 6693 +VUCOMISHZrrb 6694 +VUCOMISSZrm 6695 +VUCOMISSZrm_Int 6696 +VUCOMISSZrr 6697 +VUCOMISSZrr_Int 6698 +VUCOMISSZrrb 6699 +VUCOMISSrm 6700 +VUCOMISSrm_Int 6701 +VUCOMISSrr 6702 +VUCOMISSrr_Int 6703 +VUCOMXSDZrm 6704 +VUCOMXSDZrm_Int 6705 +VUCOMXSDZrr 6706 +VUCOMXSDZrr_Int 6707 +VUCOMXSDZrrb_Int 6708 +VUCOMXSHZrm 6709 +VUCOMXSHZrm_Int 6710 +VUCOMXSHZrr 6711 +VUCOMXSHZrr_Int 6712 +VUCOMXSHZrrb_Int 6713 +VUCOMXSSZrm 6714 +VUCOMXSSZrm_Int 6715 +VUCOMXSSZrr 6716 +VUCOMXSSZrr_Int 6717 +VUCOMXSSZrrb_Int 6718 +VUNPCKHPDYrm 6719 +VUNPCKHPDYrr 6720 +VUNPCKHPDZ 6721 +VUNPCKHPDZrm 6722 +VUNPCKHPDZrmb 6723 +VUNPCKHPDZrmbk 6724 +VUNPCKHPDZrmbkz 6725 +VUNPCKHPDZrmk 6726 +VUNPCKHPDZrmkz 6727 +VUNPCKHPDZrr 6728 +VUNPCKHPDZrrk 6729 +VUNPCKHPDZrrkz 6730 +VUNPCKHPDrm 6731 +VUNPCKHPDrr 6732 +VUNPCKHPSYrm 6733 +VUNPCKHPSYrr 6734 +VUNPCKHPSZ 6735 +VUNPCKHPSZrm 6736 +VUNPCKHPSZrmb 6737 +VUNPCKHPSZrmbk 6738 +VUNPCKHPSZrmbkz 6739 +VUNPCKHPSZrmk 6740 +VUNPCKHPSZrmkz 6741 +VUNPCKHPSZrr 6742 +VUNPCKHPSZrrk 6743 +VUNPCKHPSZrrkz 6744 +VUNPCKHPSrm 6745 +VUNPCKHPSrr 6746 +VUNPCKLPDYrm 6747 +VUNPCKLPDYrr 6748 +VUNPCKLPDZ 6749 +VUNPCKLPDZrm 6750 +VUNPCKLPDZrmb 6751 +VUNPCKLPDZrmbk 6752 +VUNPCKLPDZrmbkz 6753 +VUNPCKLPDZrmk 6754 +VUNPCKLPDZrmkz 6755 +VUNPCKLPDZrr 6756 +VUNPCKLPDZrrk 6757 +VUNPCKLPDZrrkz 6758 +VUNPCKLPDrm 6759 +VUNPCKLPDrr 6760 +VUNPCKLPSYrm 6761 +VUNPCKLPSYrr 6762 +VUNPCKLPSZ 6763 +VUNPCKLPSZrm 6764 +VUNPCKLPSZrmb 6765 +VUNPCKLPSZrmbk 6766 +VUNPCKLPSZrmbkz 6767 +VUNPCKLPSZrmk 6768 +VUNPCKLPSZrmkz 6769 +VUNPCKLPSZrr 6770 +VUNPCKLPSZrrk 6771 +VUNPCKLPSZrrkz 6772 +VUNPCKLPSrm 6773 +VUNPCKLPSrr 6774 +VXORPDYrm 6775 +VXORPDYrr 6776 +VXORPDZ 6777 +VXORPDZrm 6778 +VXORPDZrmb 6779 +VXORPDZrmbk 6780 +VXORPDZrmbkz 6781 +VXORPDZrmk 6782 +VXORPDZrmkz 6783 +VXORPDZrr 6784 +VXORPDZrrk 6785 +VXORPDZrrkz 6786 +VXORPDrm 6787 +VXORPDrr 6788 +VXORPSYrm 6789 +VXORPSYrr 6790 +VXORPSZ 6791 +VXORPSZrm 6792 +VXORPSZrmb 6793 +VXORPSZrmbk 6794 +VXORPSZrmbkz 6795 +VXORPSZrmk 6796 +VXORPSZrmkz 6797 +VXORPSZrr 6798 +VXORPSZrrk 6799 +VXORPSZrrkz 6800 +VXORPSrm 6801 +VXORPSrr 6802 +VZEROALL 6803 +VZEROUPPER 6804 +V_SET 6805 +V_SETALLONES 6806 +WAIT 6807 +WBINVD 6808 +WBNOINVD 6809 +WRFLAGS 6810 +WRFSBASE 6811 +WRGSBASE 6812 +WRMSR 6813 +WRMSRLIST 6814 +WRMSRNS 6815 +WRMSRNSir 6816 +WRMSRNSir_EVEX 6817 +WRPKRUr 6818 +WRSSD 6819 +WRSSD_EVEX 6820 +WRSSQ 6821 +WRSSQ_EVEX 6822 +WRUSSD 6823 +WRUSSD_EVEX 6824 +WRUSSQ 6825 +WRUSSQ_EVEX 6826 +XABORT 6827 +XABORT_DEF 6828 +XACQUIRE_PREFIX 6829 +XADD 6830 +XAM_F 6831 +XAM_Fp 6832 +XBEGIN 6833 +XCHG 6834 +XCH_F 6835 +XCRYPTCBC 6836 +XCRYPTCFB 6837 +XCRYPTCTR 6838 +XCRYPTECB 6839 +XCRYPTOFB 6840 +XEND 6841 +XGETBV 6842 +XLAT 6843 +XOR 6844 +XORPDrm 6845 +XORPDrr 6846 +XORPSrm 6847 +XORPSrr 6848 +XRELEASE_PREFIX 6849 +XRESLDTRK 6850 +XRSTOR 6851 +XRSTORS 6852 +XSAVE 6853 +XSAVEC 6854 +XSAVEOPT 6855 +XSAVES 6856 +XSETBV 6857 +XSHA 6858 +XSTORE 6859 +XSUSLDTRK 6860 +XTEST 6861 +Immediate 6862 +CImmediate 6863 +FPImmediate 6864 +MBB 6865 +FrameIndex 6866 +ConstantPoolIndex 6867 +TargetIndex 6868 +JumpTableIndex 6869 +ExternalSymbol 6870 +GlobalAddress 6871 +BlockAddress 6872 +RegisterMask 6873 +RegisterLiveOut 6874 +Metadata 6875 +MCSymbol 6876 +CFIIndex 6877 +IntrinsicID 6878 +Predicate 6879 +ShuffleMask 6880 +PhyReg_GR8 6881 +PhyReg_GRH8 6882 +PhyReg_GR8_NOREX2 6883 +PhyReg_GR8_NOREX 6884 +PhyReg_GR8_ABCD_H 6885 +PhyReg_GR8_ABCD_L 6886 +PhyReg_GRH16 6887 +PhyReg_GR16 6888 +PhyReg_GR16_NOREX2 6889 +PhyReg_GR16_NOREX 6890 +PhyReg_VK1 6891 +PhyReg_VK16 6892 +PhyReg_VK2 6893 +PhyReg_VK4 6894 +PhyReg_VK8 6895 +PhyReg_VK16WM 6896 +PhyReg_VK1WM 6897 +PhyReg_VK2WM 6898 +PhyReg_VK4WM 6899 +PhyReg_VK8WM 6900 +PhyReg_SEGMENT_REG 6901 +PhyReg_GR16_ABCD 6902 +PhyReg_FPCCR 6903 +PhyReg_FR16X 6904 +PhyReg_FR16 6905 +PhyReg_VK16PAIR 6906 +PhyReg_VK1PAIR 6907 +PhyReg_VK2PAIR 6908 +PhyReg_VK4PAIR 6909 +PhyReg_VK8PAIR 6910 +PhyReg_VK1PAIR_with_sub_mask_0_in_VK1WM 6911 +PhyReg_LOW32_ADDR_ACCESS_RBP 6912 +PhyReg_LOW32_ADDR_ACCESS 6913 +PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 6914 +PhyReg_FR32X 6915 +PhyReg_GR32 6916 +PhyReg_GR32_NOSP 6917 +PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 6918 +PhyReg_DEBUG_REG 6919 +PhyReg_FR32 6920 +PhyReg_GR32_NOREX2 6921 +PhyReg_GR32_NOREX2_NOSP 6922 +PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 6923 +PhyReg_GR32_NOREX 6924 +PhyReg_VK32 6925 +PhyReg_GR32_NOREX_NOSP 6926 +PhyReg_RFP32 6927 +PhyReg_VK32WM 6928 +PhyReg_GR32_ABCD 6929 +PhyReg_GR32_TC 6930 +PhyReg_GR32_ABCD_and_GR32_TC 6931 +PhyReg_GR32_AD 6932 +PhyReg_GR32_ArgRef 6933 +PhyReg_GR32_BPSP 6934 +PhyReg_GR32_BSI 6935 +PhyReg_GR32_CB 6936 +PhyReg_GR32_DC 6937 +PhyReg_GR32_DIBP 6938 +PhyReg_GR32_SIDI 6939 +PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 6940 +PhyReg_CCR 6941 +PhyReg_DFCCR 6942 +PhyReg_GR32_ABCD_and_GR32_BSI 6943 +PhyReg_GR32_AD_and_GR32_ArgRef 6944 +PhyReg_GR32_ArgRef_and_GR32_CB 6945 +PhyReg_GR32_BPSP_and_GR32_DIBP 6946 +PhyReg_GR32_BPSP_and_GR32_TC 6947 +PhyReg_GR32_BSI_and_GR32_SIDI 6948 +PhyReg_GR32_DIBP_and_GR32_SIDI 6949 +PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 6950 +PhyReg_LOW32_ADDR_ACCESS_with_sub_32bit 6951 +PhyReg_RFP64 6952 +PhyReg_GR64 6953 +PhyReg_FR64X 6954 +PhyReg_GR64_with_sub_8bit 6955 +PhyReg_GR64_NOSP 6956 +PhyReg_GR64_NOREX2 6957 +PhyReg_CONTROL_REG 6958 +PhyReg_FR64 6959 +PhyReg_GR64_with_sub_16bit_in_GR16_NOREX2 6960 +PhyReg_GR64_NOREX2_NOSP 6961 +PhyReg_GR64PLTSafe 6962 +PhyReg_GR64_TC 6963 +PhyReg_GR64_NOREX 6964 +PhyReg_GR64_TCW64 6965 +PhyReg_GR64_TC_with_sub_8bit 6966 +PhyReg_GR64_NOREX2_NOSP_and_GR64_TC 6967 +PhyReg_GR64_TCW64_with_sub_8bit 6968 +PhyReg_GR64_TC_and_GR64_TCW64 6969 +PhyReg_GR64_with_sub_16bit_in_GR16_NOREX 6970 +PhyReg_VK64 6971 +PhyReg_VR64 6972 +PhyReg_GR64PLTSafe_and_GR64_TC 6973 +PhyReg_GR64_NOREX2_NOSP_and_GR64_TCW64 6974 +PhyReg_GR64_NOREX_NOSP 6975 +PhyReg_GR64_NOREX_and_GR64_TC 6976 +PhyReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 6977 +PhyReg_VK64WM 6978 +PhyReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 6979 +PhyReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 6980 +PhyReg_GR64PLTSafe_and_GR64_TCW64 6981 +PhyReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 6982 +PhyReg_GR64_NOREX_and_GR64_TCW64 6983 +PhyReg_GR64_ABCD 6984 +PhyReg_GR64_with_sub_32bit_in_GR32_TC 6985 +PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 6986 +PhyReg_GR64_AD 6987 +PhyReg_GR64_ArgRef 6988 +PhyReg_GR64_and_LOW32_ADDR_ACCESS_RBP 6989 +PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef 6990 +PhyReg_GR64_with_sub_32bit_in_GR32_BPSP 6991 +PhyReg_GR64_with_sub_32bit_in_GR32_BSI 6992 +PhyReg_GR64_with_sub_32bit_in_GR32_CB 6993 +PhyReg_GR64_with_sub_32bit_in_GR32_DIBP 6994 +PhyReg_GR64_with_sub_32bit_in_GR32_SIDI 6995 +PhyReg_GR64_A 6996 +PhyReg_GR64_ArgRef_and_GR64_TC 6997 +PhyReg_GR64_and_LOW32_ADDR_ACCESS 6998 +PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 6999 +PhyReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7000 +PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7001 +PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7002 +PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7003 +PhyReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7004 +PhyReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7005 +PhyReg_RST 7006 +PhyReg_RFP80 7007 +PhyReg_RFP80_7 7008 +PhyReg_VR128X 7009 +PhyReg_VR128 7010 +PhyReg_VR256X 7011 +PhyReg_VR256 7012 +PhyReg_VR512 7013 +PhyReg_VR512_0_15 7014 +PhyReg_TILE 7015 +VirtReg_GR8 7016 +VirtReg_GRH8 7017 +VirtReg_GR8_NOREX2 7018 +VirtReg_GR8_NOREX 7019 +VirtReg_GR8_ABCD_H 7020 +VirtReg_GR8_ABCD_L 7021 +VirtReg_GRH16 7022 +VirtReg_GR16 7023 +VirtReg_GR16_NOREX2 7024 +VirtReg_GR16_NOREX 7025 +VirtReg_VK1 7026 +VirtReg_VK16 7027 +VirtReg_VK2 7028 +VirtReg_VK4 7029 +VirtReg_VK8 7030 +VirtReg_VK16WM 7031 +VirtReg_VK1WM 7032 +VirtReg_VK2WM 7033 +VirtReg_VK4WM 7034 +VirtReg_VK8WM 7035 +VirtReg_SEGMENT_REG 7036 +VirtReg_GR16_ABCD 7037 +VirtReg_FPCCR 7038 +VirtReg_FR16X 7039 +VirtReg_FR16 7040 +VirtReg_VK16PAIR 7041 +VirtReg_VK1PAIR 7042 +VirtReg_VK2PAIR 7043 +VirtReg_VK4PAIR 7044 +VirtReg_VK8PAIR 7045 +VirtReg_VK1PAIR_with_sub_mask_0_in_VK1WM 7046 +VirtReg_LOW32_ADDR_ACCESS_RBP 7047 +VirtReg_LOW32_ADDR_ACCESS 7048 +VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 7049 +VirtReg_FR32X 7050 +VirtReg_GR32 7051 +VirtReg_GR32_NOSP 7052 +VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 7053 +VirtReg_DEBUG_REG 7054 +VirtReg_FR32 7055 +VirtReg_GR32_NOREX2 7056 +VirtReg_GR32_NOREX2_NOSP 7057 +VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 7058 +VirtReg_GR32_NOREX 7059 +VirtReg_VK32 7060 +VirtReg_GR32_NOREX_NOSP 7061 +VirtReg_RFP32 7062 +VirtReg_VK32WM 7063 +VirtReg_GR32_ABCD 7064 +VirtReg_GR32_TC 7065 +VirtReg_GR32_ABCD_and_GR32_TC 7066 +VirtReg_GR32_AD 7067 +VirtReg_GR32_ArgRef 7068 +VirtReg_GR32_BPSP 7069 +VirtReg_GR32_BSI 7070 +VirtReg_GR32_CB 7071 +VirtReg_GR32_DC 7072 +VirtReg_GR32_DIBP 7073 +VirtReg_GR32_SIDI 7074 +VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 7075 +VirtReg_CCR 7076 +VirtReg_DFCCR 7077 +VirtReg_GR32_ABCD_and_GR32_BSI 7078 +VirtReg_GR32_AD_and_GR32_ArgRef 7079 +VirtReg_GR32_ArgRef_and_GR32_CB 7080 +VirtReg_GR32_BPSP_and_GR32_DIBP 7081 +VirtReg_GR32_BPSP_and_GR32_TC 7082 +VirtReg_GR32_BSI_and_GR32_SIDI 7083 +VirtReg_GR32_DIBP_and_GR32_SIDI 7084 +VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 7085 +VirtReg_LOW32_ADDR_ACCESS_with_sub_32bit 7086 +VirtReg_RFP64 7087 +VirtReg_GR64 7088 +VirtReg_FR64X 7089 +VirtReg_GR64_with_sub_8bit 7090 +VirtReg_GR64_NOSP 7091 +VirtReg_GR64_NOREX2 7092 +VirtReg_CONTROL_REG 7093 +VirtReg_FR64 7094 +VirtReg_GR64_with_sub_16bit_in_GR16_NOREX2 7095 +VirtReg_GR64_NOREX2_NOSP 7096 +VirtReg_GR64PLTSafe 7097 +VirtReg_GR64_TC 7098 +VirtReg_GR64_NOREX 7099 +VirtReg_GR64_TCW64 7100 +VirtReg_GR64_TC_with_sub_8bit 7101 +VirtReg_GR64_NOREX2_NOSP_and_GR64_TC 7102 +VirtReg_GR64_TCW64_with_sub_8bit 7103 +VirtReg_GR64_TC_and_GR64_TCW64 7104 +VirtReg_GR64_with_sub_16bit_in_GR16_NOREX 7105 +VirtReg_VK64 7106 +VirtReg_VR64 7107 +VirtReg_GR64PLTSafe_and_GR64_TC 7108 +VirtReg_GR64_NOREX2_NOSP_and_GR64_TCW64 7109 +VirtReg_GR64_NOREX_NOSP 7110 +VirtReg_GR64_NOREX_and_GR64_TC 7111 +VirtReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 7112 +VirtReg_VK64WM 7113 +VirtReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 7114 +VirtReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 7115 +VirtReg_GR64PLTSafe_and_GR64_TCW64 7116 +VirtReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 7117 +VirtReg_GR64_NOREX_and_GR64_TCW64 7118 +VirtReg_GR64_ABCD 7119 +VirtReg_GR64_with_sub_32bit_in_GR32_TC 7120 +VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 7121 +VirtReg_GR64_AD 7122 +VirtReg_GR64_ArgRef 7123 +VirtReg_GR64_and_LOW32_ADDR_ACCESS_RBP 7124 +VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef 7125 +VirtReg_GR64_with_sub_32bit_in_GR32_BPSP 7126 +VirtReg_GR64_with_sub_32bit_in_GR32_BSI 7127 +VirtReg_GR64_with_sub_32bit_in_GR32_CB 7128 +VirtReg_GR64_with_sub_32bit_in_GR32_DIBP 7129 +VirtReg_GR64_with_sub_32bit_in_GR32_SIDI 7130 +VirtReg_GR64_A 7131 +VirtReg_GR64_ArgRef_and_GR64_TC 7132 +VirtReg_GR64_and_LOW32_ADDR_ACCESS 7133 +VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 7134 +VirtReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7135 +VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7136 +VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7137 +VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7138 +VirtReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7139 +VirtReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7140 +VirtReg_RST 7141 +VirtReg_RFP80 7142 +VirtReg_RFP80_7 7143 +VirtReg_VR128X 7144 +VirtReg_VR128 7145 +VirtReg_VR256X 7146 +VirtReg_VR256 7147 +VirtReg_VR512 7148 +VirtReg_VR512_0_15 7149 +VirtReg_TILE 7150 |
