; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt -passes='print' -disable-output -mtriple=s390x-unknown-linux \ ; RUN: -mcpu=z15 < %s 2>&1 | FileCheck %s --check-prefix=Z15 define void @fadd_reductions() { ; Z15-LABEL: 'fadd_reductions' ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) ret void } define void @fast_fadd_reductions(ptr %src, ptr %dst) { ; Z15-LABEL: 'fast_fadd_reductions' ; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) ret void } define void @fmul_reductions() { ; Z15-LABEL: 'fmul_reductions' ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) ret void } define void @fast_fmul_reductions() { ; Z15-LABEL: 'fast_fmul_reductions' ; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) ret void } define void @fmin_reductions() { ; Z15-LABEL: 'fmin_reductions' ; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) ret void } define void @fmax_reductions() { ; Z15-LABEL: 'fmax_reductions' ; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) ret void } define void @reduceumin() { ; Z15-LABEL: 'reduceumin' ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef) ; %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef) ret void } define void @reduceumax() { ; Z15-LABEL: 'reduceumax' ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef) ; %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef) ret void } define void @reducesmin() { ; Z15-LABEL: 'reducesmin' ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef) ; %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef) ret void } define void @reducesmax() { ; Z15-LABEL: 'reducesmax' ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef) ; %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef) ret void } define void @reduceadd() { ; Z15-LABEL: 'reduceadd' ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) ; ; Z15-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) ; Z15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef) ; REDUCEADD64 %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; REDUCEADD32 %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) ; REDUCEADD16 %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) ; REDUCEADD8 %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) ; EXTREME VALUES %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef) ret void } define void @reducemul() { ; CHECK-LABEL: 'reducemul' ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) ; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) ; ; CHECK: Cost Model: Found an estimated cost of 15 for instruction: %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) ; CHECK: Cost Model: Found an estimated cost of 28 for instruction: %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef) ; REDUCEADD64 %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) ; REDUCEADD32 %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) ; REDUCEADD16 %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) ; REDUCEADD8 %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) ; EXTREME VALUES %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef) ret void } declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>) declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>) declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>) declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>) declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) declare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>) declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) declare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>) declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) declare i128 @llvm.vector.reduce.umin.v4i128(<4 x i128>) declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) declare i128 @llvm.vector.reduce.umax.v4i128(<4 x i128>) declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) declare i128 @llvm.vector.reduce.smin.v4i128(<4 x i128>) declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) declare i128 @llvm.vector.reduce.smax.v4i128(<4 x i128>) declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) declare i256 @llvm.vector.reduce.add.v4i256(<4 x i256>) declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>) declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>) declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>) declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>) declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>) declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>) declare i256 @llvm.vector.reduce.mul.v4i256(<4 x i256>)