; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVX512 ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=all -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; vXf64 ; define <2 x double> @broadcast_load_v2f64_v2f64(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v2f64_v2f64' ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; SSE2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v2f64_v2f64' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; SSSE3-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; ; SSE42-LABEL: 'broadcast_load_v2f64_v2f64' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; SSE42-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; ; AVX-LABEL: 'broadcast_load_v2f64_v2f64' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; %load = load <2 x double>, ptr %src %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer ret <2 x double> %bcst } define <4 x double> @broadcast_load_v2f64_v4f64(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v2f64_v4f64' ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; SSE2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v2f64_v4f64' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; SSSE3-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; SSE42-LABEL: 'broadcast_load_v2f64_v4f64' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; SSE42-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; AVX-LABEL: 'broadcast_load_v2f64_v4f64' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; %load = load <2 x double>, ptr %src %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer ret <4 x double> %bcst } define <8 x double> @broadcast_load_v2f64_v8f64(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v2f64_v8f64' ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; SSE2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v2f64_v8f64' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; SSSE3-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; SSE42-LABEL: 'broadcast_load_v2f64_v8f64' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; SSE42-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; AVX-LABEL: 'broadcast_load_v2f64_v8f64' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; %load = load <2 x double>, ptr %src %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer ret <8 x double> %bcst } define <2 x double> @broadcast_load_f64_v2f64(ptr %src) { ; SSE-LABEL: 'broadcast_load_f64_v2f64' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 ; SSE-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <2 x double> poison, double %load, i32 0 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; ; AVX-LABEL: 'broadcast_load_f64_v2f64' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 ; AVX-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <2 x double> poison, double %load, i32 0 ; AVX-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; %load = load double, ptr %src %insert = insertelement <2 x double> poison, double %load, i32 0 %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer ret <2 x double> %bcst } define <4 x double> @broadcast_load_f64_v4f64(ptr %src) { ; SSE-LABEL: 'broadcast_load_f64_v4f64' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 ; SSE-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; AVX1-LABEL: 'broadcast_load_f64_v4f64' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 ; AVX1-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0 ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; AVX2-LABEL: 'broadcast_load_f64_v4f64' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 ; AVX2-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0 ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; AVX512-LABEL: 'broadcast_load_f64_v4f64' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 ; AVX512-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0 ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; %load = load double, ptr %src %insert = insertelement <4 x double> poison, double %load, i32 0 %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer ret <4 x double> %bcst } define <8 x double> @broadcast_load_f64_v8f64(ptr %src) { ; SSE-LABEL: 'broadcast_load_f64_v8f64' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 ; SSE-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; AVX1-LABEL: 'broadcast_load_f64_v8f64' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 ; AVX1-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0 ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; AVX2-LABEL: 'broadcast_load_f64_v8f64' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 ; AVX2-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0 ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; AVX512-LABEL: 'broadcast_load_f64_v8f64' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 ; AVX512-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0 ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; %load = load double, ptr %src %insert = insertelement <8 x double> poison, double %load, i32 0 %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer ret <8 x double> %bcst } ; ; vXf32 ; define <4 x float> @broadcast_load_v4f32_v4f32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4f32_v4f32' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %bcst ; ; AVX-LABEL: 'broadcast_load_v4f32_v4f32' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %bcst ; %load = load <4 x float>, ptr %src %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer ret <4 x float> %bcst } define <8 x float> @broadcast_load_v4f32_v8f32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4f32_v8f32' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %bcst ; ; AVX-LABEL: 'broadcast_load_v4f32_v8f32' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %bcst ; %load = load <4 x float>, ptr %src %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer ret <8 x float> %bcst } define <16 x float> @broadcast_load_v4f32_v16f32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4f32_v16f32' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %bcst ; ; AVX-LABEL: 'broadcast_load_v4f32_v16f32' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %bcst ; %load = load <4 x float>, ptr %src %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer ret <16 x float> %bcst } define <2 x i64> @broadcast_load_v2i64_v2i64(ptr %src) { ; SSE-LABEL: 'broadcast_load_v2i64_v2i64' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %bcst ; ; AVX-LABEL: 'broadcast_load_v2i64_v2i64' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %bcst ; %load = load <2 x i64>, ptr %src %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer ret <2 x i64> %bcst } ; ; vXi64 ; define <4 x i64> @broadcast_load_v2i64_v4i64(ptr %src) { ; SSE-LABEL: 'broadcast_load_v2i64_v4i64' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %bcst ; ; AVX-LABEL: 'broadcast_load_v2i64_v4i64' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %bcst ; %load = load <2 x i64>, ptr %src %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer ret <4 x i64> %bcst } define <8 x i64> @broadcast_load_v2i64_v8i64(ptr %src) { ; SSE-LABEL: 'broadcast_load_v2i64_v8i64' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %bcst ; ; AVX-LABEL: 'broadcast_load_v2i64_v8i64' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %bcst ; %load = load <2 x i64>, ptr %src %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer ret <8 x i64> %bcst } ; ; vXi32 ; define <4 x i32> @broadcast_load_v4i32_v4i32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4i32_v4i32' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %bcst ; ; AVX-LABEL: 'broadcast_load_v4i32_v4i32' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %bcst ; %load = load <4 x i32>, ptr %src %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer ret <4 x i32> %bcst } define <8 x i32> @broadcast_load_v4i32_v8i32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4i32_v8i32' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %bcst ; ; AVX-LABEL: 'broadcast_load_v4i32_v8i32' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %bcst ; %load = load <4 x i32>, ptr %src %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer ret <8 x i32> %bcst } define <16 x i32> @broadcast_load_v4i32_v16i32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4i32_v16i32' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 ; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %bcst ; ; AVX-LABEL: 'broadcast_load_v4i32_v16i32' ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 ; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %bcst ; %load = load <4 x i32>, ptr %src %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer ret <16 x i32> %bcst } ; ; vXi16 ; define <8 x i16> @broadcast_load_v8i16_v8i16(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v8i16_v8i16' ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v8i16_v8i16' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; ; SSE42-LABEL: 'broadcast_load_v8i16_v8i16' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; ; AVX1-LABEL: 'broadcast_load_v8i16_v8i16' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; ; AVX2-LABEL: 'broadcast_load_v8i16_v8i16' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; ; AVX512-LABEL: 'broadcast_load_v8i16_v8i16' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; %load = load <8 x i16>, ptr %src %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer ret <8 x i16> %bcst } define <16 x i16> @broadcast_load_v8i16_v16i16(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v8i16_v16i16' ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v8i16_v16i16' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; ; SSE42-LABEL: 'broadcast_load_v8i16_v16i16' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; ; AVX1-LABEL: 'broadcast_load_v8i16_v16i16' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; ; AVX2-LABEL: 'broadcast_load_v8i16_v16i16' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; ; AVX512-LABEL: 'broadcast_load_v8i16_v16i16' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; %load = load <8 x i16>, ptr %src %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer ret <16 x i16> %bcst } define <32 x i16> @broadcast_load_v8i16_v32i16(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v8i16_v32i16' ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v8i16_v32i16' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; ; SSE42-LABEL: 'broadcast_load_v8i16_v32i16' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; ; AVX1-LABEL: 'broadcast_load_v8i16_v32i16' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; ; AVX2-LABEL: 'broadcast_load_v8i16_v32i16' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; ; AVX512-LABEL: 'broadcast_load_v8i16_v32i16' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 ; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; %load = load <8 x i16>, ptr %src %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer ret <32 x i16> %bcst } ; ; vXi8 ; define <16 x i8> @broadcast_load_v16i8_v16i8(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v16i8_v16i8' ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v16i8_v16i8' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; ; SSE42-LABEL: 'broadcast_load_v16i8_v16i8' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; ; AVX1-LABEL: 'broadcast_load_v16i8_v16i8' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; ; AVX2-LABEL: 'broadcast_load_v16i8_v16i8' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; ; AVX512-LABEL: 'broadcast_load_v16i8_v16i8' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; %load = load <16 x i8>, ptr %src %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer ret <16 x i8> %bcst } define <32 x i8> @broadcast_load_v16i8_v32i8(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v16i8_v32i8' ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v16i8_v32i8' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; ; SSE42-LABEL: 'broadcast_load_v16i8_v32i8' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; ; AVX1-LABEL: 'broadcast_load_v16i8_v32i8' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; ; AVX2-LABEL: 'broadcast_load_v16i8_v32i8' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; ; AVX512-LABEL: 'broadcast_load_v16i8_v32i8' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; %load = load <16 x i8>, ptr %src %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer ret <32 x i8> %bcst } define <64 x i8> @broadcast_load_v16i8_v64i8(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v16i8_v64i8' ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v16i8_v64i8' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; ; SSE42-LABEL: 'broadcast_load_v16i8_v64i8' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; ; AVX1-LABEL: 'broadcast_load_v16i8_v64i8' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; ; AVX2-LABEL: 'broadcast_load_v16i8_v64i8' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; ; AVX512-LABEL: 'broadcast_load_v16i8_v64i8' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 ; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; %load = load <16 x i8>, ptr %src %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer ret <64 x i8> %bcst }