; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-F16
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s -mattr=+bf16 | FileCheck %s --check-prefixes=CHECK,CHECK-BF16

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

define void @strict_fp_reductions() {
; CHECK-LABEL: 'strict_fp_reductions'
; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:22 Lat:44 SizeLat:28 for: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:10 Lat:20 SizeLat:12 for: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:8 Lat:16 SizeLat:8 for: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
  %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
  %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
  %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
  %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
  %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
  %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
  ret void
}

define void @strict_fp_reductions_fp16() {
; CHECK-NOFP16-LABEL: 'strict_fp_reductions_fp16'
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-F16-LABEL: 'strict_fp_reductions_fp16'
; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:60 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-BF16-LABEL: 'strict_fp_reductions_fp16'
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
  %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
  %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
  %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
  %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
  ret void
}

define void @strict_fp_reductions_bf16() {
; CHECK-NOFP16-LABEL: 'strict_fp_reductions_bf16'
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-F16-LABEL: 'strict_fp_reductions_bf16'
; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-BF16-LABEL: 'strict_fp_reductions_bf16'
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
  %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4f8(bfloat 0.0, <4 x bfloat> undef)
  ret void
}

define void @fast_fp_reductions() {
; CHECK-LABEL: 'fast_fp_reductions'
; CHECK-NEXT:  Cost Model: Found costs of 1 for: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of 1 for: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of 3 for: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of 3 for: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of 5 for: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of 3 for: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
; CHECK-NEXT:  Cost Model: Found costs of 1 for: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-NEXT:  Cost Model: Found costs of 1 for: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-NEXT:  Cost Model: Found costs of 4 for: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
; CHECK-NEXT:  Cost Model: Found costs of 8 for: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:2 Lat:6 SizeLat:2 for: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
  %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
  %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)

  %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
  %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)

  %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
  %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)

  %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.0, <13 x float> undef)
  %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.0, <5 x float> undef)

  %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
  %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)

  %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
  %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)

  %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.0, <7 x double> undef)
  %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.0, <9 x double> undef)

  %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)

  ret void
}

define void @fast_fp_reductions_fp16() {
; CHECK-NOFP16-LABEL: 'fast_fp_reductions_fp16'
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:203 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:248 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-F16-LABEL: 'fast_fp_reductions_fp16'
; CHECK-F16-NEXT:  Cost Model: Found costs of 2 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of 2 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of 2 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of 2 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of 3 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of 3 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of 4 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of 4 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of 4 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of 4 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-BF16-LABEL: 'fast_fp_reductions_fp16'
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:203 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:248 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
  %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
  %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)

  %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
  %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)

  %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
  %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)

  %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
  %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)

  %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0.0, <11 x half> undef)
  %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0.0, <13 x half> undef)

  ret void
}

define void @fast_fp_reductions_bf16() {
; CHECK-NOFP16-LABEL: 'fast_fp_reductions_bf16'
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-F16-LABEL: 'fast_fp_reductions_bf16'
; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-BF16-LABEL: 'fast_fp_reductions_bf16'
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
  %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4f8(bfloat -0.0, <4 x bfloat> undef)
  ret void
}

declare bfloat @llvm.vector.reduce.fadd.v4f8(bfloat, <4 x bfloat>)
declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>)

declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
declare half @llvm.vector.reduce.fadd.v11f16(half, <11 x half>)
declare half @llvm.vector.reduce.fadd.v13f16(half, <13 x half>)

declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
declare float @llvm.vector.reduce.fadd.v13f32(float, <13 x float>)
declare float @llvm.vector.reduce.fadd.v5f32(float, <5 x float>)

declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
declare double @llvm.vector.reduce.fadd.v7f64(double, <7 x double>)
declare double @llvm.vector.reduce.fadd.v9f64(double, <9 x double>)