diff options
author | Florian Hahn <flo@fhahn.com> | 2024-01-11 16:47:37 +0000 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2024-01-11 16:47:38 +0000 |
commit | 3b3da7c7fbc008fb23dd3365033e62d6d5deeb35 (patch) | |
tree | df3a788f11d0f333b14c4b488b1689f523f61c00 /llvm/test/Transforms/SLPVectorizer | |
parent | 90eb4e24551c44bee3b9c0ca33fcb6dbb7c381fe (diff) | |
download | llvm-3b3da7c7fbc008fb23dd3365033e62d6d5deeb35.zip llvm-3b3da7c7fbc008fb23dd3365033e62d6d5deeb35.tar.gz llvm-3b3da7c7fbc008fb23dd3365033e62d6d5deeb35.tar.bz2 |
[SLP] Add a set of tests with non-power-of-2 operations.
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer')
4 files changed, 1000 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll new file mode 100644 index 0000000..b9e959d --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-ios -S %s | FileCheck %s + +define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) { +; CHECK-LABEL: define void @v15_load_i8_mul_by_constant_store( +; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0 +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> +; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1 +; CHECK-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8 +; CHECK-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10> +; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1 +; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12 +; CHECK-NEXT: [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4 +; CHECK-NEXT: [[MUL_12:%.*]] = mul nsw i8 [[L_SRC_12]], 10 +; CHECK-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12 +; CHECK-NEXT: store i8 [[MUL_12]], ptr [[DST_12]], align 1 +; CHECK-NEXT: [[GEP_SRC_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 13 +; CHECK-NEXT: [[L_SRC_13:%.*]] = load i8, ptr [[GEP_SRC_13]], align 4 +; CHECK-NEXT: [[MUL_13:%.*]] = mul nsw i8 [[L_SRC_13]], 10 +; CHECK-NEXT: [[DST_13:%.*]] = getelementptr i8, ptr [[DST]], i8 13 +; CHECK-NEXT: store i8 [[MUL_13]], ptr [[DST_13]], align 1 +; CHECK-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14 +; CHECK-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4 +; CHECK-NEXT: [[MUL_14:%.*]] = mul nsw i8 [[L_SRC_14]], 10 +; CHECK-NEXT: [[DST_14:%.*]] = getelementptr i8, ptr [[DST]], i8 14 +; CHECK-NEXT: store i8 [[MUL_14]], ptr [[DST_14]], align 1 +; CHECK-NEXT: ret void +; +entry: + %gep.src.0 = getelementptr inbounds i8, ptr %src, i8 0 + %l.src.0 = load i8, ptr %gep.src.0, align 4 + %mul.0 = mul nsw i8 %l.src.0, 10 + store i8 %mul.0, ptr %dst + + %gep.src.1 = getelementptr inbounds i8, ptr %src, i8 1 + %l.src.1 = load i8, ptr %gep.src.1, align 4 + %mul.1 = mul nsw i8 %l.src.1, 10 + %dst.1 = getelementptr i8, ptr %dst, i8 1 + store i8 %mul.1, ptr %dst.1 + + %gep.src.2 = getelementptr inbounds i8, ptr %src, i8 2 + %l.src.2 = load i8, ptr %gep.src.2, align 4 + %mul.2 = mul nsw i8 %l.src.2, 10 + %dst.2 = getelementptr i8, ptr %dst, i8 2 + store i8 %mul.2, ptr %dst.2 + + %gep.src.3 = getelementptr inbounds i8, ptr %src, i8 3 + %l.src.3 = load i8, ptr %gep.src.3, align 4 + %mul.3 = mul nsw i8 %l.src.3, 10 + %dst.3 = getelementptr i8, ptr %dst, i8 3 + store i8 %mul.3, ptr %dst.3 + + %gep.src.4 = getelementptr inbounds i8, ptr %src, i8 4 + %l.src.4 = load i8, ptr %gep.src.4, align 4 + %mul.4 = mul nsw i8 %l.src.4, 10 + %dst.4 = getelementptr i8, ptr %dst, i8 4 + store i8 %mul.4, ptr %dst.4 + + %gep.src.5 = getelementptr inbounds i8, ptr %src, i8 5 + %l.src.5 = load i8, ptr %gep.src.5, align 4 + %mul.5 = mul nsw i8 %l.src.5, 10 + %dst.5 = getelementptr i8, ptr %dst, i8 5 + store i8 %mul.5, ptr %dst.5 + + %gep.src.6 = getelementptr inbounds i8, ptr %src, i8 6 + %l.src.6 = load i8, ptr %gep.src.6, align 4 + %mul.6 = mul nsw i8 %l.src.6, 10 + %dst.6 = getelementptr i8, ptr %dst, i8 6 + store i8 %mul.6, ptr %dst.6 + + %gep.src.7 = getelementptr inbounds i8, ptr %src, i8 7 + %l.src.7 = load i8, ptr %gep.src.7, align 4 + %mul.7 = mul nsw i8 %l.src.7, 10 + %dst.7 = getelementptr i8, ptr %dst, i8 7 + store i8 %mul.7, ptr %dst.7 + + %gep.src.8 = getelementptr inbounds i8, ptr %src, i8 8 + %l.src.8 = load i8, ptr %gep.src.8, align 4 + %mul.8 = mul nsw i8 %l.src.8, 10 + %dst.8 = getelementptr i8, ptr %dst, i8 8 + store i8 %mul.8, ptr %dst.8 + + %gep.src.9 = getelementptr inbounds i8, ptr %src, i8 9 + %l.src.9 = load i8, ptr %gep.src.9, align 4 + %mul.9 = mul nsw i8 %l.src.9, 10 + %dst.9 = getelementptr i8, ptr %dst, i8 9 + store i8 %mul.9, ptr %dst.9 + + %gep.src.10 = getelementptr inbounds i8, ptr %src, i8 10 + %l.src.10 = load i8, ptr %gep.src.10, align 4 + %mul.10 = mul nsw i8 %l.src.10, 10 + %dst.10 = getelementptr i8, ptr %dst, i8 10 + store i8 %mul.10, ptr %dst.10 + + %gep.src.11 = getelementptr inbounds i8, ptr %src, i8 11 + %l.src.11 = load i8, ptr %gep.src.11, align 4 + %mul.11 = mul nsw i8 %l.src.11, 10 + %dst.11 = getelementptr i8, ptr %dst, i8 11 + store i8 %mul.11, ptr %dst.11 + + %gep.src.12 = getelementptr inbounds i8, ptr %src, i8 12 + %l.src.12 = load i8, ptr %gep.src.12, align 4 + %mul.12 = mul nsw i8 %l.src.12, 10 + %dst.12 = getelementptr i8, ptr %dst, i8 12 + store i8 %mul.12, ptr %dst.12 + + %gep.src.13 = getelementptr inbounds i8, ptr %src, i8 13 + %l.src.13 = load i8, ptr %gep.src.13, align 4 + %mul.13 = mul nsw i8 %l.src.13, 10 + %dst.13 = getelementptr i8, ptr %dst, i8 13 + store i8 %mul.13, ptr %dst.13 + + %gep.src.14 = getelementptr inbounds i8, ptr %src, i8 14 + %l.src.14 = load i8, ptr %gep.src.14, align 4 + %mul.14 = mul nsw i8 %l.src.14, 10 + %dst.14 = getelementptr i8, ptr %dst, i8 14 + store i8 %mul.14, ptr %dst.14 + + ret void +} + + diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll new file mode 100644 index 0000000..59ffbf7 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll @@ -0,0 +1,297 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-ios -S %s | FileCheck %s + +define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { +; CHECK-LABEL: @v3_load_i32_mul_by_constant_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2 +; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 +; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], <i32 10, i32 10> +; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 +; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 +; CHECK-NEXT: ret void +; +entry: + %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0 + %l.src.0 = load i32, ptr %gep.src.0, align 4 + %mul.0 = mul nsw i32 %l.src.0, 10 + + %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1 + %l.src.1 = load i32, ptr %gep.src.1, align 4 + %mul.1 = mul nsw i32 %l.src.1, 10 + + %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2 + %l.src.2 = load i32, ptr %gep.src.2, align 4 + %mul.2 = mul nsw i32 %l.src.2, 10 + + store i32 %mul.0, ptr %dst + + %dst.1 = getelementptr i32, ptr %dst, i32 1 + store i32 %mul.1, ptr %dst.1 + + %dst.2 = getelementptr i32, ptr %dst, i32 2 + store i32 %mul.2, ptr %dst.2 + + ret void +} + +define void @v3_load_i32_mul_store(ptr %src.1, ptr %src.2, ptr %dst) { +; CHECK-LABEL: @v3_load_i32_mul_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0 +; CHECK-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0 +; CHECK-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2 +; CHECK-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4 +; CHECK-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2 +; CHECK-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4 +; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 +; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 +; CHECK-NEXT: ret void +; +entry: + %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0 + %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4 + %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0 + %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4 + %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0 + + %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1 + %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4 + %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1 + %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4 + %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1 + + %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2 + %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4 + %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2 + %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4 + %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2 + + store i32 %mul.0, ptr %dst + + %dst.1 = getelementptr i32, ptr %dst, i32 1 + store i32 %mul.1, ptr %dst.1 + + %dst.2 = getelementptr i32, ptr %dst, i32 2 + store i32 %mul.2, ptr %dst.2 + + ret void +} + +define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { +; CHECK-LABEL: @v3_load_i32_mul_add_const_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0 +; CHECK-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0 +; CHECK-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2 +; CHECK-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4 +; CHECK-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2 +; CHECK-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4 +; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]] +; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], 9 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], <i32 9, i32 9> +; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 +; CHECK-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4 +; CHECK-NEXT: ret void +; +entry: + %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0 + %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4 + %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0 + %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4 + %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0 + %add.0 = add i32 %mul.0, 9 + + %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1 + %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4 + %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1 + %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4 + %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1 + %add.1 = add i32 %mul.1, 9 + + %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2 + %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4 + %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2 + %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4 + %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2 + %add.2 = add i32 %mul.2, 9 + + store i32 %add.0, ptr %dst + + %dst.1 = getelementptr i32, ptr %dst, i32 1 + store i32 %add.1, ptr %dst.1 + + %dst.2 = getelementptr i32, ptr %dst, i32 2 + store i32 %add.2, ptr %dst.2 + + ret void +} + +define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { +; CHECK-LABEL: @v3_load_f32_fadd_fadd_by_constant_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2 +; CHECK-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 +; CHECK-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01> +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2 +; CHECK-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4 +; CHECK-NEXT: ret void +; +entry: + %gep.src.0 = getelementptr inbounds float, ptr %src, i32 0 + %l.src.0 = load float , ptr %gep.src.0, align 4 + %fadd.0 = fadd float %l.src.0, 10.0 + + %gep.src.1 = getelementptr inbounds float , ptr %src, i32 1 + %l.src.1 = load float, ptr %gep.src.1, align 4 + %fadd.1 = fadd float %l.src.1, 10.0 + + %gep.src.2 = getelementptr inbounds float, ptr %src, i32 2 + %l.src.2 = load float, ptr %gep.src.2, align 4 + %fadd.2 = fadd float %l.src.2, 10.0 + + store float %fadd.0, ptr %dst + + %dst.1 = getelementptr float, ptr %dst, i32 1 + store float %fadd.1, ptr %dst.1 + + %dst.2 = getelementptr float, ptr %dst, i32 2 + store float %fadd.2, ptr %dst.2 + + ret void +} + +define void @phi_store3(ptr %dst) { +; CHECK-LABEL: @phi_store3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: invoke.cont8.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ] +; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2 +; CHECK-NEXT: store <2 x i32> [[TMP0]], ptr [[DST]], align 4 +; CHECK-NEXT: store i32 [[P_2]], ptr [[DST_2]], align 4 +; CHECK-NEXT: ret void +; +entry: + br label %exit + +invoke.cont8.loopexit: ; No predecessors! + br label %exit + +exit: + %p.0 = phi i32 [ 1, %entry ], [ 0, %invoke.cont8.loopexit ] + %p.1 = phi i32 [ 2, %entry ], [ 0, %invoke.cont8.loopexit ] + %p.2 = phi i32 [ 3, %entry ], [ 0, %invoke.cont8.loopexit ] + + %dst.1 = getelementptr i32, ptr %dst, i32 1 + %dst.2 = getelementptr i32, ptr %dst, i32 2 + + store i32 %p.0, ptr %dst, align 4 + store i32 %p.1, ptr %dst.1, align 4 + store i32 %p.2, ptr %dst.2, align 4 + ret void +} + +define void @store_try_reorder(ptr %dst) { +; CHECK-LABEL: @store_try_reorder( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add i32 0, 0 +; CHECK-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 +; CHECK-NEXT: ret void +; +entry: + %add = add i32 0, 0 + store i32 %add, ptr %dst, align 4 + %add207 = sub i32 0, 0 + %arrayidx.i1887 = getelementptr i32, ptr %dst, i64 1 + store i32 %add207, ptr %arrayidx.i1887, align 4 + %add216 = sub i32 0, 0 + %arrayidx.i1891 = getelementptr i32, ptr %dst, i64 2 + store i32 %add216, ptr %arrayidx.i1891, align 4 + ret void +} + +define void @vec3_fpext_cost(ptr %Colour, float %0) { +; CHECK-LABEL: @vec3_fpext_cost( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double> +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP3]], <2 x double> zeroinitializer, <2 x double> zeroinitializer) +; CHECK-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float> +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[COLOUR]], align 4 +; CHECK-NEXT: [[CONV78:%.*]] = fpext float [[TMP0]] to double +; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fmuladd.f64(double [[CONV78]], double 0.000000e+00, double 0.000000e+00) +; CHECK-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP6]] to float +; CHECK-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx72 = getelementptr float, ptr %Colour, i64 1 + %arrayidx80 = getelementptr float, ptr %Colour, i64 2 + %conv62 = fpext float %0 to double + %1 = call double @llvm.fmuladd.f64(double %conv62, double 0.000000e+00, double 0.000000e+00) + %conv66 = fptrunc double %1 to float + store float %conv66, ptr %Colour, align 4 + %conv70 = fpext float %0 to double + %2 = call double @llvm.fmuladd.f64(double %conv70, double 0.000000e+00, double 0.000000e+00) + %conv74 = fptrunc double %2 to float + store float %conv74, ptr %arrayidx72, align 4 + %conv78 = fpext float %0 to double + %3 = call double @llvm.fmuladd.f64(double %conv78, double 0.000000e+00, double 0.000000e+00) + %conv82 = fptrunc double %3 to float + store float %conv82, ptr %arrayidx80, align 4 + ret void +} + +define void @fpext_gather(ptr %dst, double %conv) { +; CHECK-LABEL: @fpext_gather( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float> +; CHECK-NEXT: [[LENGTHS:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: store float [[TMP3]], ptr [[LENGTHS]], align 4 +; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr float, ptr [[DST]], i64 1 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[ARRAYIDX32]], align 4 +; CHECK-NEXT: ret void +; +entry: + %conv25 = fptrunc double %conv to float + %Lengths = getelementptr float, ptr %dst, i64 0 + store float %conv25, ptr %Lengths, align 4 + %arrayidx32 = getelementptr float, ptr %dst, i64 1 + store float %conv25, ptr %arrayidx32, align 4 + %conv34 = fptrunc double %conv to float + %arrayidx37 = getelementptr float, ptr %dst, i64 2 + store float %conv34, ptr %arrayidx37, align 4 + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) + +declare double @llvm.fmuladd.f64(double, double, double) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll new file mode 100644 index 0000000..2cb84ee --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-ios -S %s | FileCheck %s + +define void @vec3_vectorize_call(ptr %Colour, float %0) { +; CHECK-LABEL: @vec3_vectorize_call( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[COLOUR:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP1]], <2 x float> zeroinitializer, <2 x float> zeroinitializer) +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[COLOUR]], align 4 +; CHECK-NEXT: [[ARRAYIDX99_I1:%.*]] = getelementptr float, ptr [[COLOUR]], i64 2 +; CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0:%.*]], float 0.000000e+00, float 0.000000e+00) +; CHECK-NEXT: store float [[TMP3]], ptr [[ARRAYIDX99_I1]], align 4 +; CHECK-NEXT: ret void +; +entry: + %1 = load float, ptr %Colour, align 4 + %2 = call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float 0.000000e+00) + store float %2, ptr %Colour, align 4 + %arrayidx91.i = getelementptr float, ptr %Colour, i64 1 + %3 = load float, ptr %arrayidx91.i, align 4 + %4 = call float @llvm.fmuladd.f32(float %3, float 0.000000e+00, float 0.000000e+00) + store float %4, ptr %arrayidx91.i, align 4 + %arrayidx99.i1 = getelementptr float, ptr %Colour, i64 2 + %5 = call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00) + store float %5, ptr %arrayidx99.i1, align 4 + ret void +} + +define void @vec3_fmuladd_64(ptr %Colour, double %0) { +; CHECK-LABEL: @vec3_fmuladd_64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP2]], <2 x double> zeroinitializer, <2 x double> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = fptrunc <2 x double> [[TMP3]] to <2 x float> +; CHECK-NEXT: store <2 x float> [[TMP4]], ptr [[COLOUR]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fmuladd.f64(double [[TMP0]], double 0.000000e+00, double 0.000000e+00) +; CHECK-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP5]] to float +; CHECK-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx72 = getelementptr float, ptr %Colour, i64 1 + %arrayidx80 = getelementptr float, ptr %Colour, i64 2 + %1 = call double @llvm.fmuladd.f64(double %0, double 0.000000e+00, double 0.000000e+00) + %conv66 = fptrunc double %1 to float + store float %conv66, ptr %Colour, align 4 + %2 = call double @llvm.fmuladd.f64(double %0, double 0.000000e+00, double 0.000000e+00) + %conv74 = fptrunc double %2 to float + store float %conv74, ptr %arrayidx72, align 4 + %3 = call double @llvm.fmuladd.f64(double %0, double 0.000000e+00, double 0.000000e+00) + %conv82 = fptrunc double %3 to float + store float %conv82, ptr %arrayidx80, align 4 + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) + +declare double @llvm.fmuladd.f64(double, double, double) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll new file mode 100644 index 0000000..5707e14 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll @@ -0,0 +1,516 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-ios -S %s | FileCheck %s + +%struct.zot = type { i32, i32, i32 } + +define i1 @reorder_results(ptr %arg, i1 %arg1, ptr %arg2, i64 %arg3, ptr %arg4) { +; CHECK-LABEL: define i1 @reorder_results( +; CHECK-SAME: ptr [[ARG:%.*]], i1 [[ARG1:%.*]], ptr [[ARG2:%.*]], i64 [[ARG3:%.*]], ptr [[ARG4:%.*]]) { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[ARG4]], align 8 +; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[LOAD]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[LOAD]], i64 4 +; CHECK-NEXT: [[LOAD5:%.*]] = load i32, ptr [[GETELEMENTPTR]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR6:%.*]] = getelementptr i8, ptr [[LOAD]], i64 8 +; CHECK-NEXT: [[LOAD7:%.*]] = load i32, ptr [[GETELEMENTPTR6]], align 4 +; CHECK-NEXT: br i1 [[ARG1]], label [[BB12:%.*]], label [[BB9:%.*]] +; CHECK: bb8: +; CHECK-NEXT: ret i1 false +; CHECK: bb9: +; CHECK-NEXT: [[FREEZE:%.*]] = freeze ptr [[ARG]] +; CHECK-NEXT: store i32 [[LOAD4]], ptr [[FREEZE]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR10:%.*]] = getelementptr i8, ptr [[FREEZE]], i64 4 +; CHECK-NEXT: store i32 [[LOAD7]], ptr [[GETELEMENTPTR10]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR11:%.*]] = getelementptr i8, ptr [[FREEZE]], i64 8 +; CHECK-NEXT: store i32 [[LOAD5]], ptr [[GETELEMENTPTR11]], align 4 +; CHECK-NEXT: br label [[BB8:%.*]] +; CHECK: bb12: +; CHECK-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr [[STRUCT_ZOT:%.*]], ptr [[ARG2]], i64 [[ARG3]] +; CHECK-NEXT: store i32 [[LOAD4]], ptr [[GETELEMENTPTR13]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR14:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR13]], i64 4 +; CHECK-NEXT: store i32 [[LOAD7]], ptr [[GETELEMENTPTR14]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR15:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR13]], i64 8 +; CHECK-NEXT: store i32 [[LOAD5]], ptr [[GETELEMENTPTR15]], align 4 +; CHECK-NEXT: br label [[BB8]] +; +bb: + %load = load ptr, ptr %arg4, align 8 + %load4 = load i32, ptr %load, align 4 + %getelementptr = getelementptr i8, ptr %load, i64 4 + %load5 = load i32, ptr %getelementptr, align 4 + %getelementptr6 = getelementptr i8, ptr %load, i64 8 + %load7 = load i32, ptr %getelementptr6, align 4 + br i1 %arg1, label %bb12, label %bb9 + +bb8: ; preds = %bb12, %bb9 + ret i1 false + +bb9: ; preds = %bb + %freeze = freeze ptr %arg + store i32 %load4, ptr %freeze, align 4 + %getelementptr10 = getelementptr i8, ptr %freeze, i64 4 + store i32 %load7, ptr %getelementptr10, align 4 + %getelementptr11 = getelementptr i8, ptr %freeze, i64 8 + store i32 %load5, ptr %getelementptr11, align 4 + br label %bb8 + +bb12: ; preds = %bb + %getelementptr13 = getelementptr %struct.zot, ptr %arg2, i64 %arg3 + store i32 %load4, ptr %getelementptr13, align 4 + %getelementptr14 = getelementptr i8, ptr %getelementptr13, i64 4 + store i32 %load7, ptr %getelementptr14, align 4 + %getelementptr15 = getelementptr i8, ptr %getelementptr13, i64 8 + store i32 %load5, ptr %getelementptr15, align 4 + br label %bb8 +} + +define void @extract_mask(ptr %object, double %conv503, double %conv520) { +; CHECK-LABEL: define void @extract_mask( +; CHECK-SAME: ptr [[OBJECT:%.*]], double [[CONV503:%.*]], double [[CONV520:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OBJECT]], align 8 +; CHECK-NEXT: [[BBOX483:%.*]] = getelementptr float, ptr [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[BBOX483]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[CONV503]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <2 x double> [[TMP4]], <double 0.000000e+00, double -2.000000e+10> +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x double> [[TMP3]], <2 x double> <double 0.000000e+00, double -2.000000e+10> +; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fptrunc <2 x double> [[TMP7]] to <2 x float> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 +; CHECK-NEXT: [[MUL646:%.*]] = fmul float [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[CMP663:%.*]] = fcmp olt float [[MUL646]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP663]], label [[IF_THEN665:%.*]], label [[IF_END668:%.*]] +; CHECK: if.then665: +; CHECK-NEXT: [[ARRAYIDX656:%.*]] = getelementptr float, ptr [[OBJECT]], i64 10 +; CHECK-NEXT: [[BBOX651:%.*]] = getelementptr float, ptr [[OBJECT]] +; CHECK-NEXT: [[CONV621:%.*]] = fptrunc double [[CONV520]] to float +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> <i32 poison, i32 0> +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[CONV503]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = fptrunc <2 x double> [[TMP12]] to <2 x float> +; CHECK-NEXT: store <2 x float> [[TMP13]], ptr [[BBOX651]], align 8 +; CHECK-NEXT: [[BBOX_SROA_8_0_BBOX666_SROA_IDX:%.*]] = getelementptr float, ptr [[OBJECT]], i64 2 +; CHECK-NEXT: store float [[CONV621]], ptr [[BBOX_SROA_8_0_BBOX666_SROA_IDX]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[ARRAYIDX656]], align 8 +; CHECK-NEXT: br label [[IF_END668]] +; CHECK: if.end668: +; CHECK-NEXT: ret void +; +entry: + %0 = load ptr, ptr %object, align 8 + %bbox483 = getelementptr float, ptr %0 + %1 = load float, ptr %bbox483, align 8 + %conv486 = fpext float %1 to double + %cmp487 = fcmp ogt double %conv486, -2.000000e+10 + %conv486.2 = select i1 %cmp487, double %conv486, double -2.000000e+10 + %arrayidx502 = getelementptr float, ptr %0, i64 1 + %2 = load float, ptr %arrayidx502, align 4 + %conv5033 = fpext float %2 to double + %cmp504 = fcmp ogt double %conv503, 0.000000e+00 + %cond514 = select i1 %cmp504, double %conv5033, double 0.000000e+00 + %sub626 = fsub double 0.000000e+00, %conv486.2 + %conv627 = fptrunc double %sub626 to float + %sub632 = fsub double 0.000000e+00, %cond514 + %conv633 = fptrunc double %sub632 to float + %mul646 = fmul float %conv633, %conv627 + %cmp663 = fcmp olt float %mul646, 0.000000e+00 + br i1 %cmp663, label %if.then665, label %if.end668 + +if.then665: ; preds = %entry + %arrayidx656 = getelementptr float, ptr %object, i64 10 + %lengths652 = getelementptr float, ptr %object, i64 11 + %bbox651 = getelementptr float, ptr %object + %conv621 = fptrunc double %conv520 to float + %conv617 = fptrunc double %cond514 to float + %conv613 = fptrunc double %conv503 to float + store float %conv613, ptr %bbox651, align 8 + %bbox.sroa.6.0.bbox666.sroa_idx = getelementptr float, ptr %object, i64 1 + store float %conv617, ptr %bbox.sroa.6.0.bbox666.sroa_idx, align 4 + %bbox.sroa.8.0.bbox666.sroa_idx = getelementptr float, ptr %object, i64 2 + store float %conv621, ptr %bbox.sroa.8.0.bbox666.sroa_idx, align 8 + store float %conv627, ptr %lengths652, align 4 + store float %conv633, ptr %arrayidx656, align 8 + br label %if.end668 + +if.end668: ; preds = %if.then665, %entry + ret void +} + +define void @gather_2(ptr %mat1, float %0, float %1) { +; CHECK-LABEL: define void @gather_2( +; CHECK-SAME: ptr [[MAT1:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float 0.000000e+00, float 0.000000e+00) +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> zeroinitializer) +; CHECK-NEXT: [[TMP7:%.*]] = fmul float [[TMP2]], 0.000000e+00 +; CHECK-NEXT: [[ARRAYIDX163:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1 +; CHECK-NEXT: [[ARRAYIDX2_I_I_I278:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1, i64 1 +; CHECK-NEXT: store float [[TMP7]], ptr [[ARRAYIDX163]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP6]], zeroinitializer +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[ARRAYIDX2_I_I_I278]], align 4 +; CHECK-NEXT: ret void +; +entry: + %2 = call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00) + %3 = call float @llvm.fmuladd.f32(float %1, float %0, float 0.000000e+00) + %4 = call float @llvm.fmuladd.f32(float %0, float %1, float 0.000000e+00) + %5 = fmul float %2, 0.000000e+00 + %6 = fmul float %3, 0.000000e+00 + %7 = fmul float %4, 0.000000e+00 + %arrayidx163 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1 + %arrayidx2.i.i.i278 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1, i64 1 + %arrayidx5.i.i.i280 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1, i64 2 + store float %5, ptr %arrayidx163, align 4 + store float %6, ptr %arrayidx2.i.i.i278, align 4 + store float %7, ptr %arrayidx5.i.i.i280, align 4 + ret void +} + +define i32 @reorder_indices_1(float %0) { +; CHECK-LABEL: define i32 @reorder_indices_1( +; CHECK-SAME: float [[TMP0:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NOR1:%.*]] = alloca [0 x [3 x float]], i32 0, align 4 +; CHECK-NEXT: [[ARRAYIDX2_I265:%.*]] = getelementptr float, ptr [[NOR1]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2_I265]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[NOR1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]] +; CHECK-NEXT: [[NEG11_I:%.*]] = fmul float [[TMP4]], [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fmuladd.f32(float [[TMP1]], float 0.000000e+00, float [[NEG11_I]]) +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3> +; CHECK-NEXT: [[TMP8:%.*]] = fneg <2 x float> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = fmul <2 x float> [[TMP8]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: [[TMP13:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> zeroinitializer, <2 x float> [[TMP12]]) +; CHECK-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP10]], <2 x float> [[TMP13]], <2 x float> zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP5]], float 0.000000e+00) +; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x float> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[MUL6_I_I_I:%.*]] = fmul float [[TMP15]], 0.000000e+00 +; CHECK-NEXT: store <2 x float> [[TMP16]], ptr [[NOR1]], align 4 +; CHECK-NEXT: store float [[MUL6_I_I_I]], ptr [[ARRAYIDX2_I265]], align 4 +; CHECK-NEXT: ret i32 0 +; +entry: + %nor1 = alloca [0 x [3 x float]], i32 0, align 4 + %arrayidx.i = getelementptr float, ptr %nor1, i64 1 + %1 = load float, ptr %arrayidx.i, align 4 + %arrayidx2.i265 = getelementptr float, ptr %nor1, i64 2 + %2 = load float, ptr %arrayidx2.i265, align 4 + %3 = fneg float %2 + %neg.i267 = fmul float %3, %0 + %4 = call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float %neg.i267) + %5 = load float, ptr %nor1, align 4 + %6 = fneg float %5 + %neg11.i = fmul float %6, %0 + %7 = call float @llvm.fmuladd.f32(float %2, float 0.000000e+00, float %neg11.i) + %8 = fneg float %1 + %neg18.i = fmul float %8, %0 + %9 = call float @llvm.fmuladd.f32(float %5, float 0.000000e+00, float %neg18.i) + %10 = call float @llvm.fmuladd.f32(float %0, float %9, float 0.000000e+00) + %11 = call float @llvm.fmuladd.f32(float %0, float %4, float 0.000000e+00) + %12 = call float @llvm.fmuladd.f32(float %0, float %7, float 0.000000e+00) + %mul.i.i.i = fmul float %10, 0.000000e+00 + %mul3.i.i.i = fmul float %11, 0.000000e+00 + %mul6.i.i.i = fmul float %12, 0.000000e+00 + store float %mul.i.i.i, ptr %nor1, align 4 + store float %mul3.i.i.i, ptr %arrayidx.i, align 4 + store float %mul6.i.i.i, ptr %arrayidx2.i265, align 4 + ret i32 0 +} + +define void @reorder_indices_2(ptr %spoint) { +; CHECK-LABEL: define void @reorder_indices_2( +; CHECK-SAME: ptr [[SPOINT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x float> zeroinitializer, i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float 0.000000e+00, float 0.000000e+00) +; CHECK-NEXT: [[MUL4_I461:%.*]] = fmul float [[TMP1]], 0.000000e+00 +; CHECK-NEXT: [[DSCO:%.*]] = getelementptr float, ptr [[SPOINT]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer) +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], zeroinitializer +; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[DSCO]], align 4 +; CHECK-NEXT: [[ARRAYIDX5_I476:%.*]] = getelementptr float, ptr [[SPOINT]], i64 2 +; CHECK-NEXT: store float [[MUL4_I461]], ptr [[ARRAYIDX5_I476]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = extractelement <3 x float> zeroinitializer, i64 1 + %1 = extractelement <3 x float> zeroinitializer, i64 2 + %2 = extractelement <3 x float> zeroinitializer, i64 0 + %3 = tail call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00) + %4 = tail call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float 0.000000e+00) + %5 = tail call float @llvm.fmuladd.f32(float %2, float 0.000000e+00, float 0.000000e+00) + %mul.i457 = fmul float %3, 0.000000e+00 + %mul2.i459 = fmul float %4, 0.000000e+00 + %mul4.i461 = fmul float %5, 0.000000e+00 + %dsco = getelementptr float, ptr %spoint, i64 0 + store float %mul.i457, ptr %dsco, align 4 + %arrayidx3.i474 = getelementptr float, ptr %spoint, i64 1 + store float %mul2.i459, ptr %arrayidx3.i474, align 4 + %arrayidx5.i476 = getelementptr float, ptr %spoint, i64 2 + store float %mul4.i461, ptr %arrayidx5.i476, align 4 + ret void +} + +define void @reorder_indices_2x_load(ptr %png_ptr, ptr %info_ptr) { +; CHECK-LABEL: define void @reorder_indices_2x_load( +; CHECK-SAME: ptr [[PNG_PTR:%.*]], ptr [[INFO_PTR:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BIT_DEPTH:%.*]] = getelementptr i8, ptr [[INFO_PTR]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[BIT_DEPTH]], align 4 +; CHECK-NEXT: [[COLOR_TYPE:%.*]] = getelementptr i8, ptr [[INFO_PTR]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[COLOR_TYPE]], align 1 +; CHECK-NEXT: [[BIT_DEPTH37_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 11 +; CHECK-NEXT: store i8 [[TMP0]], ptr [[BIT_DEPTH37_I]], align 1 +; CHECK-NEXT: [[COLOR_TYPE39_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 10 +; CHECK-NEXT: store i8 [[TMP1]], ptr [[COLOR_TYPE39_I]], align 2 +; CHECK-NEXT: [[USR_BIT_DEPTH_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 12 +; CHECK-NEXT: store i8 [[TMP0]], ptr [[USR_BIT_DEPTH_I]], align 8 +; CHECK-NEXT: ret void +; +entry: + %bit_depth = getelementptr i8, ptr %info_ptr, i64 0 + %0 = load i8, ptr %bit_depth, align 4 + %color_type = getelementptr i8, ptr %info_ptr, i64 1 + %1 = load i8, ptr %color_type, align 1 + %bit_depth37.i = getelementptr i8, ptr %png_ptr, i64 11 + store i8 %0, ptr %bit_depth37.i, align 1 + %color_type39.i = getelementptr i8, ptr %png_ptr, i64 10 + store i8 %1, ptr %color_type39.i, align 2 + %usr_bit_depth.i = getelementptr i8, ptr %png_ptr, i64 12 + store i8 %0, ptr %usr_bit_depth.i, align 8 + ret void +} + +define void @reuse_shuffle_indidces_1(ptr %col, float %0, float %1) { +; CHECK-LABEL: define void @reuse_shuffle_indidces_1( +; CHECK-SAME: ptr [[COL:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[TMP4]], zeroinitializer +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[COL]], align 4 +; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr float, ptr [[COL]], i64 2 +; CHECK-NEXT: [[MUL38:%.*]] = fmul float [[TMP0]], 0.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[MUL38]], 0.000000e+00 +; CHECK-NEXT: store float [[TMP6]], ptr [[ARRAYIDX33]], align 4 +; CHECK-NEXT: ret void +; +entry: + %mul24 = fmul float %1, 0.000000e+00 + %2 = fadd float %mul24, 0.000000e+00 + store float %2, ptr %col, align 4 + %arrayidx26 = getelementptr float, ptr %col, i64 1 + %mul31 = fmul float %0, 0.000000e+00 + %3 = fadd float %mul31, 0.000000e+00 + store float %3, ptr %arrayidx26, align 4 + %arrayidx33 = getelementptr float, ptr %col, i64 2 + %mul38 = fmul float %0, 0.000000e+00 + %4 = fadd float %mul38, 0.000000e+00 + store float %4, ptr %arrayidx33, align 4 + ret void +} + +define void @reuse_shuffle_indices_2(ptr %inertia, double %0) { +; CHECK-LABEL: define void @reuse_shuffle_indices_2( +; CHECK-SAME: ptr [[INERTIA:%.*]], double [[TMP0:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 poison> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef> +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[TMP6]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef> +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> +; CHECK-NEXT: store <3 x float> [[TMP9]], ptr [[INERTIA]], align 4 +; CHECK-NEXT: ret void +; +entry: + %1 = insertelement <2 x double> poison, double %0, i32 0 + %2 = shufflevector <2 x double> %1, <2 x double> poison, <2 x i32> zeroinitializer + %3 = fptrunc <2 x double> %2 to <2 x float> + %4 = fmul <2 x float> %3, zeroinitializer + %5 = shufflevector <2 x float> %4, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 poison> + %6 = fadd <4 x float> %5, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef> + %7 = fmul <4 x float> %6, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef> + %8 = fadd <4 x float> %7, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef> + %9 = shufflevector <4 x float> %8, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> + store <3 x float> %9, ptr %inertia, align 4 + ret void +} + +define void @reuse_shuffle_indices_cost_crash_2(ptr %bezt, float %0) { +; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_2( +; CHECK-SAME: ptr [[BEZT:%.*]], float [[TMP0:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FNEG:%.*]] = fmul float [[TMP0]], 0.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[FNEG]], float 0.000000e+00) +; CHECK-NEXT: store float [[TMP1]], ptr [[BEZT]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[FNEG]], float 0.000000e+00) +; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr float, ptr [[BEZT]], i64 1 +; CHECK-NEXT: store float [[TMP2]], ptr [[ARRAYIDX5_I]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = tail call float @llvm.fmuladd.f32(float [[FNEG]], float 0.000000e+00, float 0.000000e+00) +; CHECK-NEXT: [[ARRAYIDX8_I831:%.*]] = getelementptr float, ptr [[BEZT]], i64 2 +; CHECK-NEXT: store float [[TMP3]], ptr [[ARRAYIDX8_I831]], align 4 +; CHECK-NEXT: ret void +; +entry: + %fneg = fmul float %0, 0.000000e+00 + %1 = tail call float @llvm.fmuladd.f32(float %0, float %fneg, float 0.000000e+00) + store float %1, ptr %bezt, align 4 + %2 = tail call float @llvm.fmuladd.f32(float %0, float %fneg, float 0.000000e+00) + %arrayidx5.i = getelementptr float, ptr %bezt, i64 1 + store float %2, ptr %arrayidx5.i, align 4 + %3 = tail call float @llvm.fmuladd.f32(float %fneg, float 0.000000e+00, float 0.000000e+00) + %arrayidx8.i831 = getelementptr float, ptr %bezt, i64 2 + store float %3, ptr %arrayidx8.i831, align 4 + ret void +} + +define void @reuse_shuffle_indices_cost_crash_3(ptr %m, double %conv, double %conv2) { +; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_3( +; CHECK-SAME: ptr [[M:%.*]], double [[CONV:%.*]], double [[CONV2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB19:%.*]] = fsub double 0.000000e+00, [[CONV2]] +; CHECK-NEXT: [[CONV20:%.*]] = fptrunc double [[SUB19]] to float +; CHECK-NEXT: store float [[CONV20]], ptr [[M]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 0.000000e+00 +; CHECK-NEXT: [[CONV239:%.*]] = fptrunc double [[ADD]] to float +; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 1 +; CHECK-NEXT: store float [[CONV239]], ptr [[ARRAYIDX25]], align 4 +; CHECK-NEXT: [[ADD26:%.*]] = fsub double [[CONV]], [[CONV]] +; CHECK-NEXT: [[CONV27:%.*]] = fptrunc double [[ADD26]] to float +; CHECK-NEXT: [[ARRAYIDX29:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 2 +; CHECK-NEXT: store float [[CONV27]], ptr [[ARRAYIDX29]], align 4 +; CHECK-NEXT: ret void +; +entry: + %sub19 = fsub double 0.000000e+00, %conv2 + %conv20 = fptrunc double %sub19 to float + store float %conv20, ptr %m, align 4 + %add = fadd double %conv, 0.000000e+00 + %conv239 = fptrunc double %add to float + %arrayidx25 = getelementptr [4 x float], ptr %m, i64 0, i64 1 + store float %conv239, ptr %arrayidx25, align 4 + %add26 = fsub double %conv, %conv + %conv27 = fptrunc double %add26 to float + %arrayidx29 = getelementptr [4 x float], ptr %m, i64 0, i64 2 + store float %conv27, ptr %arrayidx29, align 4 + ret void +} + +define void @reuse_shuffle_indices_cost_crash_4(double %conv7.i) { +; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_4( +; CHECK-SAME: double [[CONV7_I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DATA_I111:%.*]] = alloca [0 x [0 x [0 x [3 x float]]]], i32 0, align 4 +; CHECK-NEXT: [[ARRAYIDX_2_I:%.*]] = getelementptr [3 x float], ptr [[DATA_I111]], i64 0, i64 2 +; CHECK-NEXT: [[MUL17_I_US:%.*]] = fmul double [[CONV7_I]], 0.000000e+00 +; CHECK-NEXT: [[MUL_2_I_I_US:%.*]] = fmul double [[MUL17_I_US]], 0.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV7_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[ADD_2_I_I_US:%.*]] = fadd double [[MUL_2_I_I_US]], 0.000000e+00 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float> +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[DATA_I111]], align 4 +; CHECK-NEXT: [[CONV_2_I46_US:%.*]] = fptrunc double [[ADD_2_I_I_US]] to float +; CHECK-NEXT: store float [[CONV_2_I46_US]], ptr [[ARRAYIDX_2_I]], align 4 +; CHECK-NEXT: [[CALL2_I_US:%.*]] = load volatile ptr, ptr [[DATA_I111]], align 8 +; CHECK-NEXT: ret void +; +entry: + %data.i111 = alloca [0 x [0 x [0 x [3 x float]]]], i32 0, align 4 + %arrayidx.1.i = getelementptr [3 x float], ptr %data.i111, i64 0, i64 1 + %arrayidx.2.i = getelementptr [3 x float], ptr %data.i111, i64 0, i64 2 + %mul17.i.us = fmul double %conv7.i, 0.000000e+00 + %mul.2.i.i.us = fmul double %mul17.i.us, 0.000000e+00 + %add.i.i82.i.us = fadd double %conv7.i, 0.000000e+00 + %add.1.i.i84.i.us = fadd double %conv7.i, 0.000000e+00 + %mul.i.i91.i.us = fmul double %add.i.i82.i.us, %conv7.i + %mul.1.i.i92.i.us = fmul double %add.1.i.i84.i.us, %conv7.i + %add.i96.i.us = fadd double %mul.i.i91.i.us, 0.000000e+00 + %add.1.i.i.us = fadd double %mul.1.i.i92.i.us, 0.000000e+00 + %add.2.i.i.us = fadd double %mul.2.i.i.us, 0.000000e+00 + %conv.i42.us = fptrunc double %add.i96.i.us to float + store float %conv.i42.us, ptr %data.i111, align 4 + %conv.1.i44.us = fptrunc double %add.1.i.i.us to float + store float %conv.1.i44.us, ptr %arrayidx.1.i, align 4 + %conv.2.i46.us = fptrunc double %add.2.i.i.us to float + store float %conv.2.i46.us, ptr %arrayidx.2.i, align 4 + %call2.i.us = load volatile ptr, ptr %data.i111, align 8 + ret void +} + +define void @common_mask(ptr %m, double %conv, double %conv2) { +; CHECK-LABEL: define void @common_mask( +; CHECK-SAME: ptr [[M:%.*]], double [[CONV:%.*]], double [[CONV2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB19:%.*]] = fsub double [[CONV]], [[CONV]] +; CHECK-NEXT: [[CONV20:%.*]] = fptrunc double [[SUB19]] to float +; CHECK-NEXT: store float [[CONV20]], ptr [[M]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV2]], 0.000000e+00 +; CHECK-NEXT: [[CONV239:%.*]] = fptrunc double [[ADD]] to float +; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 1 +; CHECK-NEXT: store float [[CONV239]], ptr [[ARRAYIDX25]], align 4 +; CHECK-NEXT: [[ADD26:%.*]] = fsub double 0.000000e+00, [[CONV]] +; CHECK-NEXT: [[CONV27:%.*]] = fptrunc double [[ADD26]] to float +; CHECK-NEXT: [[ARRAYIDX29:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 2 +; CHECK-NEXT: store float [[CONV27]], ptr [[ARRAYIDX29]], align 4 +; CHECK-NEXT: ret void +; +entry: + %sub19 = fsub double %conv, %conv + %conv20 = fptrunc double %sub19 to float + store float %conv20, ptr %m, align 4 + %add = fadd double %conv2, 0.000000e+00 + %conv239 = fptrunc double %add to float + %arrayidx25 = getelementptr [4 x float], ptr %m, i64 0, i64 1 + store float %conv239, ptr %arrayidx25, align 4 + %add26 = fsub double 0.000000e+00, %conv + %conv27 = fptrunc double %add26 to float + %arrayidx29 = getelementptr [4 x float], ptr %m, i64 0, i64 2 + store float %conv27, ptr %arrayidx29, align 4 + ret void +} + +define void @vec3_extract(<3 x i16> %pixel.sroa.0.4.vec.insert606, ptr %call3.i536) { +; CHECK-LABEL: define void @vec3_extract( +; CHECK-SAME: <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606:%.*]], ptr [[CALL3_I536:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PIXEL_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], i64 2 +; CHECK-NEXT: [[RED668:%.*]] = getelementptr i16, ptr [[CALL3_I536]], i64 2 +; CHECK-NEXT: store i16 [[PIXEL_SROA_0_4_VEC_EXTRACT]], ptr [[RED668]], align 2 +; CHECK-NEXT: [[PIXEL_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], i64 1 +; CHECK-NEXT: [[GREEN670:%.*]] = getelementptr i16, ptr [[CALL3_I536]], i64 1 +; CHECK-NEXT: store i16 [[PIXEL_SROA_0_2_VEC_EXTRACT]], ptr [[GREEN670]], align 2 +; CHECK-NEXT: [[PIXEL_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], i64 0 +; CHECK-NEXT: store i16 [[PIXEL_SROA_0_0_VEC_EXTRACT]], ptr [[CALL3_I536]], align 2 +; CHECK-NEXT: ret void +; +entry: + %pixel.sroa.0.4.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 2 + %red668 = getelementptr i16, ptr %call3.i536, i64 2 + store i16 %pixel.sroa.0.4.vec.extract, ptr %red668, align 2 + %pixel.sroa.0.2.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 1 + %green670 = getelementptr i16, ptr %call3.i536, i64 1 + store i16 %pixel.sroa.0.2.vec.extract, ptr %green670, align 2 + %pixel.sroa.0.0.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 0 + store i16 %pixel.sroa.0.0.vec.extract, ptr %call3.i536, align 2 + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) |