; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEFP
; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

define void @fadd_strict() {
; CHECK-V8-LABEL: 'fadd_strict'
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:12 Lat:20 SizeLat:12 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:24 Lat:40 SizeLat:24 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:48 Lat:80 SizeLat:48 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:24 SizeLat:8 for: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:20 Lat:28 SizeLat:20 for: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-MVEFP-LABEL: 'fadd_strict'
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 6 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 12 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 24 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 8 for: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 8 for: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 32 for: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-MVEI-LABEL: 'fadd_strict'
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 16 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 32 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 16 for: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 32 for: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
  %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
  %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
  %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
  %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
  %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
  %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
  %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
  %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
  %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
  %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  ret void
}


define void @fadd_unordered() {
; CHECK-V8-LABEL: 'fadd_unordered'
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:7 Lat:9 SizeLat:7 for: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:16 for: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:33 Lat:39 SizeLat:33 for: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:92 CodeSize:66 Lat:74 SizeLat:66 for: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:9 SizeLat:3 for: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:3 Lat:9 SizeLat:3 for: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:52 CodeSize:30 Lat:34 SizeLat:30 for: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-MVEFP-LABEL: 'fadd_unordered'
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 6 for: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:8 for: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:40 Lat:76 SizeLat:76 for: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-MVEI-LABEL: 'fadd_unordered'
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:57 CodeSize:41 Lat:57 SizeLat:57 for: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:169 CodeSize:121 Lat:169 SizeLat:169 for: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:225 CodeSize:161 Lat:225 SizeLat:225 for: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:57 CodeSize:41 Lat:57 SizeLat:57 for: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:85 CodeSize:61 Lat:85 SizeLat:85 for: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:40 Lat:76 SizeLat:76 for: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
  %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
  %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
  %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
  %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
  %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
  %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
  %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
  %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
  %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
  %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  ret void
}

define void @fmul_strict() {
; CHECK-V8-LABEL: 'fmul_strict'
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:10 SizeLat:6 for: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:12 Lat:20 SizeLat:12 for: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:24 Lat:40 SizeLat:24 for: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:48 Lat:80 SizeLat:48 for: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:24 SizeLat:8 for: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:20 Lat:28 SizeLat:20 for: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-MVEFP-LABEL: 'fmul_strict'
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 6 for: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 12 for: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 24 for: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 8 for: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 8 for: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 32 for: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-MVEI-LABEL: 'fmul_strict'
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 16 for: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 32 for: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 16 for: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of 32 for: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
  %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
  %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
  %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
  %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
  %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
  %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
  %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
  %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
  %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
  %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
  ret void
}


define void @fmul_unordered() {
; CHECK-V8-LABEL: 'fmul_unordered'
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:7 Lat:9 SizeLat:7 for: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:16 for: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:33 Lat:39 SizeLat:33 for: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:92 CodeSize:66 Lat:74 SizeLat:66 for: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:9 SizeLat:3 for: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:3 Lat:9 SizeLat:3 for: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:52 CodeSize:30 Lat:34 SizeLat:30 for: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-MVEFP-LABEL: 'fmul_unordered'
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 6 for: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:8 for: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:40 Lat:76 SizeLat:76 for: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-MVEI-LABEL: 'fmul_unordered'
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:57 CodeSize:41 Lat:57 SizeLat:57 for: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:169 CodeSize:121 Lat:169 SizeLat:169 for: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:225 CodeSize:161 Lat:225 SizeLat:225 for: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:57 CodeSize:41 Lat:57 SizeLat:57 for: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:85 CodeSize:61 Lat:85 SizeLat:85 for: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:40 Lat:76 SizeLat:76 for: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
  %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
  %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
  %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
  %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
  %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
  %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
  %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
  %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
  %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
  %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
  ret void
}