; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1 ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512,AVX512VBMI2 ; ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=bdver2 | FileCheck %s --check-prefixes=XOP ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 ; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=tigerlake | FileCheck %s --check-prefixes=AVX512,AVX512GFNI target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" ; ; Variable Funnel Shifts ; define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512, i64 %c64, <2 x i64> %c128, <4 x i64> %c256, <8 x i64> %c512) { ; SSSE3-LABEL: 'var_funnel_i64' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; SSSE3-NEXT: Cost Model: Found costs of RThru:18 CodeSize:21 Lat:22 SizeLat:26 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:36 CodeSize:42 Lat:44 SizeLat:52 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:72 CodeSize:84 Lat:88 SizeLat:104 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'var_funnel_i64' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; SSE42-NEXT: Cost Model: Found costs of RThru:14 CodeSize:15 Lat:20 SizeLat:22 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:30 Lat:40 SizeLat:44 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; SSE42-NEXT: Cost Model: Found costs of RThru:56 CodeSize:60 Lat:80 SizeLat:88 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'var_funnel_i64' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:13 Lat:15 SizeLat:19 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:35 Lat:23 SizeLat:48 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; AVX1-NEXT: Cost Model: Found costs of RThru:50 CodeSize:70 Lat:46 SizeLat:96 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'var_funnel_i64' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:13 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:14 SizeLat:14 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:14 Lat:28 SizeLat:28 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'var_funnel_i64' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:7 SizeLat:8 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'var_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'var_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; AVX512DQ-NEXT: Cost Model: Found costs of 7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:7 SizeLat:8 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; AVX512DQ-NEXT: Cost Model: Found costs of 7 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'var_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'var_funnel_i64' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; SLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:15 Lat:25 SizeLat:23 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; SLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:30 Lat:50 SizeLat:46 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; SLM-NEXT: Cost Model: Found costs of RThru:80 CodeSize:60 Lat:100 SizeLat:92 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'var_funnel_i64' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:15 Lat:20 SizeLat:22 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:30 Lat:40 SizeLat:44 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; GLM-NEXT: Cost Model: Found costs of RThru:56 CodeSize:60 Lat:80 SizeLat:88 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'var_funnel_i64' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:23 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:46 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'var_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64) %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128) %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256) %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512) ret void } define void @var_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512, i32 %c32, <4 x i32> %c128, <8 x i32> %c256, <16 x i32> %c512) { ; SSSE3-LABEL: 'var_funnel_i32' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; SSSE3-NEXT: Cost Model: Found costs of RThru:35 CodeSize:32 Lat:37 SizeLat:38 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:70 CodeSize:63 Lat:73 SizeLat:75 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:140 CodeSize:125 Lat:145 SizeLat:149 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'var_funnel_i32' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:43 SizeLat:35 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; SSE42-NEXT: Cost Model: Found costs of RThru:72 CodeSize:47 Lat:85 SizeLat:69 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; SSE42-NEXT: Cost Model: Found costs of RThru:144 CodeSize:93 Lat:169 SizeLat:137 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'var_funnel_i32' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; AVX1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:21 Lat:24 SizeLat:28 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; AVX1-NEXT: Cost Model: Found costs of RThru:36 CodeSize:51 Lat:34 SizeLat:69 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; AVX1-NEXT: Cost Model: Found costs of RThru:72 CodeSize:102 Lat:68 SizeLat:138 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'var_funnel_i32' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:12 SizeLat:12 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:14 SizeLat:16 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:14 Lat:28 SizeLat:32 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'var_funnel_i32' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:7 SizeLat:8 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'var_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'var_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; AVX512DQ-NEXT: Cost Model: Found costs of 7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:7 SizeLat:8 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; AVX512DQ-NEXT: Cost Model: Found costs of 7 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'var_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'var_funnel_i32' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:43 SizeLat:35 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; SLM-NEXT: Cost Model: Found costs of RThru:72 CodeSize:47 Lat:85 SizeLat:69 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; SLM-NEXT: Cost Model: Found costs of RThru:144 CodeSize:93 Lat:169 SizeLat:137 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'var_funnel_i32' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; GLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:43 SizeLat:35 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; GLM-NEXT: Cost Model: Found costs of RThru:72 CodeSize:47 Lat:85 SizeLat:69 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; GLM-NEXT: Cost Model: Found costs of RThru:144 CodeSize:93 Lat:169 SizeLat:137 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'var_funnel_i32' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:23 Lat:23 SizeLat:30 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:46 Lat:46 SizeLat:60 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'var_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32) %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128) %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256) %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512) ret void } define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { ; SSSE3-LABEL: 'var_funnel_i16' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; SSSE3-NEXT: Cost Model: Found costs of RThru:46 CodeSize:54 Lat:52 SizeLat:58 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:92 CodeSize:107 Lat:103 SizeLat:115 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:184 CodeSize:213 Lat:205 SizeLat:229 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'var_funnel_i16' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; SSE42-NEXT: Cost Model: Found costs of RThru:39 CodeSize:39 Lat:46 SizeLat:44 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; SSE42-NEXT: Cost Model: Found costs of RThru:78 CodeSize:77 Lat:91 SizeLat:87 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; SSE42-NEXT: Cost Model: Found costs of RThru:156 CodeSize:153 Lat:181 SizeLat:173 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'var_funnel_i16' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:30 Lat:31 SizeLat:39 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:55 SizeLat:92 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; AVX1-NEXT: Cost Model: Found costs of RThru:108 CodeSize:140 Lat:110 SizeLat:184 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'var_funnel_i16' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:15 Lat:35 SizeLat:26 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; AVX2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:25 Lat:26 SizeLat:38 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; AVX2-NEXT: Cost Model: Found costs of RThru:44 CodeSize:50 Lat:52 SizeLat:76 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'var_funnel_i16' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:15 Lat:34 SizeLat:25 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:21 CodeSize:25 Lat:25 SizeLat:35 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:32 CodeSize:38 Lat:50 SizeLat:46 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'var_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'var_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:15 Lat:34 SizeLat:25 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:21 CodeSize:25 Lat:25 SizeLat:35 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:32 CodeSize:38 Lat:50 SizeLat:46 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'var_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'var_funnel_i16' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; SLM-NEXT: Cost Model: Found costs of RThru:41 CodeSize:39 Lat:48 SizeLat:45 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; SLM-NEXT: Cost Model: Found costs of RThru:82 CodeSize:77 Lat:95 SizeLat:89 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; SLM-NEXT: Cost Model: Found costs of RThru:164 CodeSize:153 Lat:189 SizeLat:177 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'var_funnel_i16' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; GLM-NEXT: Cost Model: Found costs of RThru:39 CodeSize:39 Lat:46 SizeLat:44 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; GLM-NEXT: Cost Model: Found costs of RThru:78 CodeSize:77 Lat:91 SizeLat:87 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; GLM-NEXT: Cost Model: Found costs of RThru:156 CodeSize:153 Lat:181 SizeLat:173 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'var_funnel_i16' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:25 Lat:23 SizeLat:31 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:50 Lat:46 SizeLat:62 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'var_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ret void } define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { ; SSSE3-LABEL: 'var_funnel_i8' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; SSSE3-NEXT: Cost Model: Found costs of RThru:33 CodeSize:60 Lat:55 SizeLat:65 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:66 CodeSize:119 Lat:109 SizeLat:129 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:132 CodeSize:237 Lat:217 SizeLat:257 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'var_funnel_i8' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; SSE42-NEXT: Cost Model: Found costs of RThru:37 CodeSize:40 Lat:57 SizeLat:52 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; SSE42-NEXT: Cost Model: Found costs of RThru:74 CodeSize:79 Lat:113 SizeLat:103 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; SSE42-NEXT: Cost Model: Found costs of RThru:148 CodeSize:157 Lat:225 SizeLat:205 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'var_funnel_i8' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:28 Lat:54 SizeLat:41 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; AVX1-NEXT: Cost Model: Found costs of RThru:58 CodeSize:72 Lat:54 SizeLat:102 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; AVX1-NEXT: Cost Model: Found costs of RThru:116 CodeSize:144 Lat:108 SizeLat:204 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'var_funnel_i8' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:28 Lat:54 SizeLat:40 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:28 Lat:59 SizeLat:56 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; AVX2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:56 Lat:118 SizeLat:112 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'var_funnel_i8' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; AVX512F-NEXT: Cost Model: Found costs of RThru:17 CodeSize:28 Lat:53 SizeLat:39 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:19 CodeSize:28 Lat:58 SizeLat:53 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'var_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:13 Lat:21 SizeLat:15 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:28 Lat:58 SizeLat:39 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:18 CodeSize:33 Lat:51 SizeLat:38 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'var_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:17 CodeSize:28 Lat:53 SizeLat:39 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:19 CodeSize:28 Lat:58 SizeLat:53 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'var_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:13 Lat:21 SizeLat:15 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:28 Lat:58 SizeLat:39 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:33 Lat:51 SizeLat:38 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'var_funnel_i8' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; SLM-NEXT: Cost Model: Found costs of RThru:39 CodeSize:40 Lat:59 SizeLat:53 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; SLM-NEXT: Cost Model: Found costs of RThru:78 CodeSize:79 Lat:117 SizeLat:105 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; SLM-NEXT: Cost Model: Found costs of RThru:156 CodeSize:157 Lat:233 SizeLat:209 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'var_funnel_i8' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; GLM-NEXT: Cost Model: Found costs of RThru:37 CodeSize:40 Lat:57 SizeLat:52 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; GLM-NEXT: Cost Model: Found costs of RThru:74 CodeSize:79 Lat:113 SizeLat:103 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; GLM-NEXT: Cost Model: Found costs of RThru:148 CodeSize:157 Lat:225 SizeLat:205 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'var_funnel_i8' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:25 Lat:23 SizeLat:31 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:50 Lat:46 SizeLat:62 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'var_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:13 Lat:21 SizeLat:15 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:28 Lat:58 SizeLat:39 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:18 CodeSize:33 Lat:51 SizeLat:38 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ret void } ; ; Uniform Variable Funnel Shifts ; define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512, i64 %c64, <2 x i64> %c128, <4 x i64> %c256, <8 x i64> %c512) { ; SSSE3-LABEL: 'splatvar_funnel_i64' ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:13 Lat:14 SizeLat:16 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:26 Lat:28 SizeLat:32 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:56 CodeSize:52 Lat:56 SizeLat:64 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i64' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:12 SizeLat:12 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:14 Lat:24 SizeLat:24 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:28 Lat:48 SizeLat:48 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:13 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:11 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:14 SizeLat:14 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:14 Lat:28 SizeLat:28 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i64' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_funnel_i64' ; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:7 Lat:17 SizeLat:13 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:14 Lat:34 SizeLat:26 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:64 CodeSize:28 Lat:68 SizeLat:52 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_funnel_i64' ; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:12 SizeLat:12 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:14 Lat:24 SizeLat:24 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:28 Lat:48 SizeLat:48 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_funnel_i64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ret void } define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512, i32 %c32, <4 x i32> %c128, <8 x i32> %c256, <16 x i32> %c512) { ; SSSE3-LABEL: 'splatvar_funnel_i32' ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:10 SizeLat:11 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:20 CodeSize:17 Lat:19 SizeLat:21 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:40 CodeSize:33 Lat:37 SizeLat:41 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i32' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i32' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_funnel_i32' ; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_funnel_i32' ; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_funnel_i32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ret void } define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { ; SSSE3-LABEL: 'splatvar_funnel_i16' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:10 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:20 CodeSize:17 Lat:19 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:40 CodeSize:33 Lat:37 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i16' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i16' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i16' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i16' ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_funnel_i16' ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:7 Lat:12 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:23 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:25 Lat:45 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_funnel_i16' ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_funnel_i16' ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ret void } define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { ; SSSE3-LABEL: 'splatvar_funnel_i8' ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:29 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:35 Lat:57 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:69 Lat:113 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i8' ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i8' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:16 Lat:18 SizeLat:22 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:36 Lat:26 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:72 Lat:52 SizeLat:94 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i8' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:20 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:21 SizeLat:27 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:42 SizeLat:54 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i8' ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_funnel_i8' ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:16 Lat:31 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:31 Lat:61 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:104 CodeSize:61 Lat:121 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_funnel_i8' ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_funnel_i8' ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:25 Lat:23 SizeLat:31 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:50 Lat:46 SizeLat:62 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ret void } ; ; Constant Funnel Shifts ; define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'constant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; SSSE3-NEXT: Cost Model: Found costs of RThru:17 CodeSize:20 Lat:21 SizeLat:25 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:34 CodeSize:40 Lat:42 SizeLat:50 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:68 CodeSize:80 Lat:84 SizeLat:100 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'constant_funnel_i64' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:14 Lat:19 SizeLat:21 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:26 CodeSize:28 Lat:38 SizeLat:42 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:52 CodeSize:56 Lat:76 SizeLat:84 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'constant_funnel_i64' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX1-NEXT: Cost Model: Found costs of RThru:9 CodeSize:12 Lat:14 SizeLat:18 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:22 SizeLat:46 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:68 Lat:44 SizeLat:92 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'constant_funnel_i64' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:12 SizeLat:8 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:6 Lat:13 SizeLat:12 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:12 Lat:26 SizeLat:24 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'constant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX512F-NEXT: Cost Model: Found costs of 6 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:6 SizeLat:7 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; AVX512F-NEXT: Cost Model: Found costs of 6 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'constant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'constant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX512DQ-NEXT: Cost Model: Found costs of 6 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:6 SizeLat:7 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; AVX512DQ-NEXT: Cost Model: Found costs of 6 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'constant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'constant_funnel_i64' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; SLM-NEXT: Cost Model: Found costs of RThru:19 CodeSize:14 Lat:24 SizeLat:22 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; SLM-NEXT: Cost Model: Found costs of RThru:38 CodeSize:28 Lat:48 SizeLat:44 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; SLM-NEXT: Cost Model: Found costs of RThru:76 CodeSize:56 Lat:96 SizeLat:88 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'constant_funnel_i64' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; GLM-NEXT: Cost Model: Found costs of RThru:13 CodeSize:14 Lat:19 SizeLat:21 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; GLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:28 Lat:38 SizeLat:42 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; GLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:56 Lat:76 SizeLat:84 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'constant_funnel_i64' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; XOP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:11 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; XOP-NEXT: Cost Model: Found costs of RThru:20 CodeSize:22 Lat:22 SizeLat:28 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; XOP-NEXT: Cost Model: Found costs of RThru:40 CodeSize:44 Lat:44 SizeLat:56 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'constant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ret void } define void @constant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'constant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; SSSE3-NEXT: Cost Model: Found costs of RThru:23 CodeSize:28 Lat:25 SizeLat:32 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:46 CodeSize:55 Lat:49 SizeLat:63 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:92 CodeSize:109 Lat:97 SizeLat:125 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'constant_funnel_i32' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; SSE42-NEXT: Cost Model: Found costs of RThru:23 CodeSize:20 Lat:33 SizeLat:25 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:46 CodeSize:39 Lat:65 SizeLat:49 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:92 CodeSize:77 Lat:129 SizeLat:97 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'constant_funnel_i32' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:24 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:31 CodeSize:43 Lat:30 SizeLat:60 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:62 CodeSize:86 Lat:60 SizeLat:120 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'constant_funnel_i32' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:11 SizeLat:11 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:6 Lat:13 SizeLat:14 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:12 Lat:26 SizeLat:28 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'constant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; AVX512F-NEXT: Cost Model: Found costs of 6 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:6 SizeLat:7 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; AVX512F-NEXT: Cost Model: Found costs of 6 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'constant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'constant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; AVX512DQ-NEXT: Cost Model: Found costs of 6 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:6 SizeLat:7 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; AVX512DQ-NEXT: Cost Model: Found costs of 6 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'constant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'constant_funnel_i32' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; SLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:20 Lat:33 SizeLat:31 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; SLM-NEXT: Cost Model: Found costs of RThru:64 CodeSize:39 Lat:65 SizeLat:61 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; SLM-NEXT: Cost Model: Found costs of RThru:128 CodeSize:77 Lat:129 SizeLat:121 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'constant_funnel_i32' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; GLM-NEXT: Cost Model: Found costs of RThru:23 CodeSize:20 Lat:33 SizeLat:25 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; GLM-NEXT: Cost Model: Found costs of RThru:46 CodeSize:39 Lat:65 SizeLat:49 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; GLM-NEXT: Cost Model: Found costs of RThru:92 CodeSize:77 Lat:129 SizeLat:97 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'constant_funnel_i32' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; XOP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:11 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; XOP-NEXT: Cost Model: Found costs of RThru:20 CodeSize:22 Lat:22 SizeLat:28 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; XOP-NEXT: Cost Model: Found costs of RThru:40 CodeSize:44 Lat:44 SizeLat:56 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'constant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7) %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ret void } define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'constant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; SSSE3-NEXT: Cost Model: Found costs of RThru:22 CodeSize:38 Lat:29 SizeLat:38 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:44 CodeSize:75 Lat:57 SizeLat:75 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:88 CodeSize:149 Lat:113 SizeLat:149 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'constant_funnel_i16' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:28 Lat:36 SizeLat:33 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:56 CodeSize:55 Lat:71 SizeLat:65 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:112 CodeSize:109 Lat:141 SizeLat:129 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'constant_funnel_i16' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:19 Lat:26 SizeLat:28 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:44 CodeSize:50 Lat:46 SizeLat:71 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:88 CodeSize:100 Lat:92 SizeLat:142 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'constant_funnel_i16' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; AVX2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:10 Lat:21 SizeLat:16 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:15 Lat:20 SizeLat:24 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:30 CodeSize:30 Lat:40 SizeLat:48 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'constant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; AVX512F-NEXT: Cost Model: Found costs of RThru:10 CodeSize:10 Lat:20 SizeLat:15 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:14 CodeSize:15 Lat:19 SizeLat:22 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:31 CodeSize:37 Lat:49 SizeLat:45 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'constant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'constant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:10 CodeSize:10 Lat:20 SizeLat:15 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:14 CodeSize:15 Lat:19 SizeLat:22 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:31 CodeSize:37 Lat:49 SizeLat:45 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'constant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'constant_funnel_i16' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; SLM-NEXT: Cost Model: Found costs of RThru:31 CodeSize:28 Lat:38 SizeLat:34 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; SLM-NEXT: Cost Model: Found costs of RThru:62 CodeSize:55 Lat:75 SizeLat:67 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; SLM-NEXT: Cost Model: Found costs of RThru:124 CodeSize:109 Lat:149 SizeLat:133 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'constant_funnel_i16' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:28 Lat:36 SizeLat:33 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; GLM-NEXT: Cost Model: Found costs of RThru:56 CodeSize:55 Lat:71 SizeLat:65 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; GLM-NEXT: Cost Model: Found costs of RThru:112 CodeSize:109 Lat:141 SizeLat:129 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'constant_funnel_i16' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; XOP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:11 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; XOP-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:22 SizeLat:29 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; XOP-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:44 SizeLat:58 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'constant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ret void } define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'constant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:59 Lat:54 SizeLat:64 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:117 Lat:107 SizeLat:127 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:128 CodeSize:233 Lat:213 SizeLat:253 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'constant_funnel_i8' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:39 Lat:56 SizeLat:51 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:72 CodeSize:77 Lat:111 SizeLat:101 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:144 CodeSize:153 Lat:221 SizeLat:201 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'constant_funnel_i8' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:27 Lat:53 SizeLat:40 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:57 CodeSize:71 Lat:53 SizeLat:100 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:114 CodeSize:142 Lat:106 SizeLat:200 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'constant_funnel_i8' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; AVX2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:27 Lat:53 SizeLat:39 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:27 Lat:58 SizeLat:54 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:38 CodeSize:54 Lat:116 SizeLat:108 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'constant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; AVX512F-NEXT: Cost Model: Found costs of RThru:16 CodeSize:27 Lat:52 SizeLat:38 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:18 CodeSize:27 Lat:57 SizeLat:52 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:39 CodeSize:72 Lat:55 SizeLat:84 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'constant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:12 Lat:20 SizeLat:14 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:27 Lat:57 SizeLat:38 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:17 CodeSize:32 Lat:50 SizeLat:37 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'constant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:16 CodeSize:27 Lat:52 SizeLat:38 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:18 CodeSize:27 Lat:57 SizeLat:52 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:39 CodeSize:72 Lat:55 SizeLat:84 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'constant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:12 Lat:20 SizeLat:14 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:27 Lat:57 SizeLat:38 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:32 Lat:50 SizeLat:37 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'constant_funnel_i8' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; SLM-NEXT: Cost Model: Found costs of RThru:38 CodeSize:39 Lat:58 SizeLat:52 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; SLM-NEXT: Cost Model: Found costs of RThru:76 CodeSize:77 Lat:115 SizeLat:103 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; SLM-NEXT: Cost Model: Found costs of RThru:152 CodeSize:153 Lat:229 SizeLat:205 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'constant_funnel_i8' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; GLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:39 Lat:56 SizeLat:51 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; GLM-NEXT: Cost Model: Found costs of RThru:72 CodeSize:77 Lat:111 SizeLat:101 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; GLM-NEXT: Cost Model: Found costs of RThru:144 CodeSize:153 Lat:221 SizeLat:201 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'constant_funnel_i8' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; XOP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:11 SizeLat:7 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; XOP-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:22 SizeLat:29 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; XOP-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:44 SizeLat:58 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'constant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:12 CodeSize:12 Lat:20 SizeLat:14 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:12 CodeSize:27 Lat:57 SizeLat:38 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:17 CodeSize:32 Lat:50 SizeLat:37 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ret void } ; ; Uniform Constant Funnel Shifts ; define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; SSSE3-NEXT: Cost Model: Found costs of RThru:11 CodeSize:12 Lat:11 SizeLat:13 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:22 CodeSize:24 Lat:22 SizeLat:26 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:44 CodeSize:48 Lat:44 SizeLat:52 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found costs of RThru:14 CodeSize:12 Lat:18 SizeLat:18 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:24 Lat:36 SizeLat:36 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:10 SizeLat:8 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found costs of RThru:18 CodeSize:20 Lat:20 SizeLat:26 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found costs of RThru:36 CodeSize:40 Lat:40 SizeLat:52 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:8 SizeLat:8 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:9 SizeLat:12 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:18 SizeLat:24 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX512F-NEXT: Cost Model: Found costs of 6 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:6 SizeLat:7 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found costs of 6 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX512DQ-NEXT: Cost Model: Found costs of 6 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:6 SizeLat:7 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found costs of 6 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; SLM-NEXT: Cost Model: Found costs of RThru:13 CodeSize:6 Lat:14 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:12 Lat:28 SizeLat:20 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:24 Lat:56 SizeLat:40 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:12 Lat:18 SizeLat:18 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:24 Lat:36 SizeLat:36 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; XOP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found costs of RThru:18 CodeSize:20 Lat:20 SizeLat:26 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found costs of RThru:36 CodeSize:40 Lat:40 SizeLat:52 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ret void } define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; SSSE3-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:8 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:15 Lat:13 SizeLat:15 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:29 Lat:25 SizeLat:29 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i32' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found costs of RThru:14 CodeSize:11 Lat:13 SizeLat:13 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:21 Lat:25 SizeLat:25 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found costs of RThru:18 CodeSize:20 Lat:20 SizeLat:26 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found costs of RThru:36 CodeSize:40 Lat:40 SizeLat:52 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:9 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:18 SizeLat:24 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; AVX512F-NEXT: Cost Model: Found costs of 6 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:6 SizeLat:7 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found costs of 6 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; AVX512DQ-NEXT: Cost Model: Found costs of 6 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:6 SizeLat:7 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found costs of 6 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; SLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:11 Lat:13 SizeLat:13 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:21 Lat:25 SizeLat:25 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:11 Lat:13 SizeLat:13 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:21 Lat:25 SizeLat:25 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; XOP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found costs of RThru:18 CodeSize:20 Lat:20 SizeLat:26 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found costs of RThru:36 CodeSize:40 Lat:40 SizeLat:52 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ret void } define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; SSSE3-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:8 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:15 Lat:13 SizeLat:15 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:29 Lat:25 SizeLat:29 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i16' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found costs of RThru:14 CodeSize:11 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:21 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:18 CodeSize:22 Lat:20 SizeLat:27 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:36 CodeSize:44 Lat:40 SizeLat:54 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:9 SizeLat:12 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:18 SizeLat:24 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX512F-NEXT: Cost Model: Found costs of 6 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:18 SizeLat:14 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:37 SizeLat:29 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found costs of 6 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX512DQ-NEXT: Cost Model: Found costs of 6 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:18 SizeLat:14 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:37 SizeLat:29 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; SLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:9 SizeLat:8 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:11 Lat:17 SizeLat:15 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:21 Lat:33 SizeLat:29 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:11 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:21 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; XOP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:18 CodeSize:22 Lat:20 SizeLat:27 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:36 CodeSize:44 Lat:40 SizeLat:54 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ret void } define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; SSSE3-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:19 SizeLat:12 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:19 Lat:37 SizeLat:23 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:37 Lat:73 SizeLat:45 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i8' ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:19 SizeLat:11 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found costs of RThru:14 CodeSize:15 Lat:37 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:29 Lat:73 SizeLat:41 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; AVX1-NEXT: Cost Model: Found costs of RThru:9 CodeSize:8 Lat:19 SizeLat:11 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:28 Lat:22 SizeLat:33 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:56 Lat:44 SizeLat:66 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:21 SizeLat:11 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:8 Lat:21 SizeLat:16 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:16 Lat:42 SizeLat:32 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:20 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:20 SizeLat:14 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:13 CodeSize:25 Lat:41 SizeLat:27 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:18 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:20 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:20 SizeLat:10 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:20 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:20 SizeLat:14 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:13 CodeSize:25 Lat:41 SizeLat:27 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:18 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:20 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:20 SizeLat:10 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; SLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:8 Lat:21 SizeLat:12 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:15 Lat:41 SizeLat:23 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:29 Lat:81 SizeLat:45 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:19 SizeLat:11 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:15 Lat:37 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:29 Lat:73 SizeLat:41 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; XOP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:11 SizeLat:7 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:22 SizeLat:29 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:44 SizeLat:58 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:5 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:16 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:16 SizeLat:8 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:16 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ret void } ; ; Variable Unary Funnel Shifts (Rotates) ; define void @var_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %c64, <2 x i64> %c128, <4 x i64> %c256, <8 x i64> %c512) { ; SSE-LABEL: 'var_rotate_i64' ; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 %c64) ; SSE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:13 Lat:16 SizeLat:18 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128) ; SSE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:26 Lat:32 SizeLat:36 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256) ; SSE-NEXT: Cost Model: Found costs of RThru:44 CodeSize:52 Lat:64 SizeLat:72 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'var_rotate_i64' ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 %c64) ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:11 Lat:11 SizeLat:15 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128) ; AVX1-NEXT: Cost Model: Found costs of RThru:18 CodeSize:29 Lat:18 SizeLat:40 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256) ; AVX1-NEXT: Cost Model: Found costs of RThru:36 CodeSize:58 Lat:36 SizeLat:80 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'var_rotate_i64' ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 %c64) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:9 SizeLat:5 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128) ; AVX2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:11 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256) ; AVX2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:10 Lat:22 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'var_rotate_i64' ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 %c64) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512) ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'var_rotate_i64' ; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 %c64) ; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:13 Lat:16 SizeLat:18 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128) ; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:26 Lat:32 SizeLat:36 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256) ; SLM-NEXT: Cost Model: Found costs of RThru:56 CodeSize:52 Lat:64 SizeLat:72 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'var_rotate_i64' ; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 %c64) ; GLM-NEXT: Cost Model: Found costs of RThru:11 CodeSize:13 Lat:16 SizeLat:18 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128) ; GLM-NEXT: Cost Model: Found costs of RThru:22 CodeSize:26 Lat:32 SizeLat:36 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256) ; GLM-NEXT: Cost Model: Found costs of RThru:44 CodeSize:52 Lat:64 SizeLat:72 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'var_rotate_i64' ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 %c64) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 %c64) %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128) %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256) %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512) ret void } define void @var_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %c32, <4 x i32> %c128, <8 x i32> %c256, <16 x i32> %c512) { ; SSSE3-LABEL: 'var_rotate_i32' ; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 %c32) ; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:28 Lat:34 SizeLat:34 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:55 Lat:67 SizeLat:67 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:128 CodeSize:109 Lat:133 SizeLat:133 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'var_rotate_i32' ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 %c32) ; SSE42-NEXT: Cost Model: Found costs of RThru:33 CodeSize:22 Lat:40 SizeLat:32 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128) ; SSE42-NEXT: Cost Model: Found costs of RThru:66 CodeSize:43 Lat:79 SizeLat:63 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256) ; SSE42-NEXT: Cost Model: Found costs of RThru:132 CodeSize:85 Lat:157 SizeLat:125 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'var_rotate_i32' ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 %c32) ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:19 Lat:21 SizeLat:25 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128) ; AVX1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:45 Lat:29 SizeLat:61 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256) ; AVX1-NEXT: Cost Model: Found costs of RThru:58 CodeSize:90 Lat:58 SizeLat:122 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'var_rotate_i32' ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 %c32) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128) ; AVX2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:11 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256) ; AVX2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:10 Lat:22 SizeLat:24 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'var_rotate_i32' ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 %c32) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512) ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'var_rotate_i32' ; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 %c32) ; SLM-NEXT: Cost Model: Found costs of RThru:33 CodeSize:22 Lat:40 SizeLat:32 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128) ; SLM-NEXT: Cost Model: Found costs of RThru:66 CodeSize:43 Lat:79 SizeLat:63 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256) ; SLM-NEXT: Cost Model: Found costs of RThru:132 CodeSize:85 Lat:157 SizeLat:125 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'var_rotate_i32' ; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 %c32) ; GLM-NEXT: Cost Model: Found costs of RThru:33 CodeSize:22 Lat:40 SizeLat:32 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128) ; GLM-NEXT: Cost Model: Found costs of RThru:66 CodeSize:43 Lat:79 SizeLat:63 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256) ; GLM-NEXT: Cost Model: Found costs of RThru:132 CodeSize:85 Lat:157 SizeLat:125 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'var_rotate_i32' ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 %c32) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 %c32) %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128) %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256) %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512) ret void } define void @var_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { ; SSSE3-LABEL: 'var_rotate_i16' ; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; SSSE3-NEXT: Cost Model: Found costs of RThru:43 CodeSize:50 Lat:49 SizeLat:54 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:86 CodeSize:99 Lat:97 SizeLat:107 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:172 CodeSize:197 Lat:193 SizeLat:213 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'var_rotate_i16' ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:37 Lat:43 SizeLat:41 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; SSE42-NEXT: Cost Model: Found costs of RThru:72 CodeSize:73 Lat:85 SizeLat:81 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; SSE42-NEXT: Cost Model: Found costs of RThru:144 CodeSize:145 Lat:169 SizeLat:161 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'var_rotate_i16' ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; AVX1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:28 Lat:28 SizeLat:36 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; AVX1-NEXT: Cost Model: Found costs of RThru:47 CodeSize:62 Lat:50 SizeLat:83 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; AVX1-NEXT: Cost Model: Found costs of RThru:94 CodeSize:124 Lat:100 SizeLat:166 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'var_rotate_i16' ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:13 Lat:32 SizeLat:23 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; AVX2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:34 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; AVX2-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:68 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'var_rotate_i16' ; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; AVX512F-NEXT: Cost Model: Found costs of RThru:13 CodeSize:13 Lat:32 SizeLat:23 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:27 CodeSize:29 Lat:41 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'var_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'var_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:13 CodeSize:13 Lat:32 SizeLat:23 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:27 CodeSize:29 Lat:41 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'var_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'var_rotate_i16' ; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:37 Lat:43 SizeLat:41 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; SLM-NEXT: Cost Model: Found costs of RThru:72 CodeSize:73 Lat:85 SizeLat:81 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; SLM-NEXT: Cost Model: Found costs of RThru:144 CodeSize:145 Lat:169 SizeLat:161 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'var_rotate_i16' ; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; GLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:37 Lat:43 SizeLat:41 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; GLM-NEXT: Cost Model: Found costs of RThru:72 CodeSize:73 Lat:85 SizeLat:81 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; GLM-NEXT: Cost Model: Found costs of RThru:144 CodeSize:145 Lat:169 SizeLat:161 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'var_rotate_i16' ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'var_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ret void } define void @var_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { ; SSSE3-LABEL: 'var_rotate_i8' ; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; SSSE3-NEXT: Cost Model: Found costs of RThru:30 CodeSize:56 Lat:52 SizeLat:61 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:60 CodeSize:111 Lat:103 SizeLat:121 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:120 CodeSize:221 Lat:205 SizeLat:241 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'var_rotate_i8' ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; SSE42-NEXT: Cost Model: Found costs of RThru:34 CodeSize:38 Lat:54 SizeLat:49 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; SSE42-NEXT: Cost Model: Found costs of RThru:68 CodeSize:75 Lat:107 SizeLat:97 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; SSE42-NEXT: Cost Model: Found costs of RThru:136 CodeSize:149 Lat:213 SizeLat:193 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'var_rotate_i8' ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:26 Lat:51 SizeLat:38 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; AVX1-NEXT: Cost Model: Found costs of RThru:51 CodeSize:64 Lat:49 SizeLat:93 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; AVX1-NEXT: Cost Model: Found costs of RThru:102 CodeSize:128 Lat:98 SizeLat:186 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'var_rotate_i8' ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; AVX2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:26 Lat:51 SizeLat:37 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; AVX2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:26 Lat:56 SizeLat:52 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; AVX2-NEXT: Cost Model: Found costs of RThru:34 CodeSize:52 Lat:112 SizeLat:104 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'var_rotate_i8' ; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:26 Lat:51 SizeLat:37 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:17 CodeSize:26 Lat:56 SizeLat:50 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'var_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'var_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:26 Lat:51 SizeLat:37 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:17 CodeSize:26 Lat:56 SizeLat:50 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'var_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'var_rotate_i8' ; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; SLM-NEXT: Cost Model: Found costs of RThru:34 CodeSize:38 Lat:54 SizeLat:49 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; SLM-NEXT: Cost Model: Found costs of RThru:68 CodeSize:75 Lat:107 SizeLat:97 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; SLM-NEXT: Cost Model: Found costs of RThru:136 CodeSize:149 Lat:213 SizeLat:193 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'var_rotate_i8' ; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; GLM-NEXT: Cost Model: Found costs of RThru:34 CodeSize:38 Lat:54 SizeLat:49 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; GLM-NEXT: Cost Model: Found costs of RThru:68 CodeSize:75 Lat:107 SizeLat:97 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; GLM-NEXT: Cost Model: Found costs of RThru:136 CodeSize:149 Lat:213 SizeLat:193 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'var_rotate_i8' ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'var_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ret void } ; ; Uniform Variable Unary Funnel Shifts (Rotates) ; define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %c64, <2 x i64> %c128, <4 x i64> %c256, <8 x i64> %c512) { ; SSE-LABEL: 'splatvar_rotate_i64' ; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:8 SizeLat:8 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; SSE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:16 SizeLat:16 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; SSE-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:18 SizeLat:22 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:36 SizeLat:44 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:11 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:22 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'splatvar_rotate_i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_rotate_i64' ; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:8 SizeLat:8 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:10 Lat:16 SizeLat:16 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:20 Lat:32 SizeLat:32 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_rotate_i64' ; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:8 SizeLat:8 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:16 SizeLat:16 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_rotate_i64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ret void } define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %c32, <4 x i32> %c128, <8 x i32> %c256, <16 x i32> %c512) { ; SSE-LABEL: 'splatvar_rotate_i32' ; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; SSE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; SSE-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'splatvar_rotate_i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_rotate_i32' ; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_rotate_i32' ; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_rotate_i32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ret void } define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { ; SSE-LABEL: 'splatvar_rotate_i16' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; SSE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; SSE-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i16' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i16' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i16' ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_rotate_i16' ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_rotate_i16' ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_rotate_i16' ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ret void } define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { ; SSE-LABEL: 'splatvar_rotate_i8' ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; SSE-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; SSE-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i8' ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:15 SizeLat:19 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:28 Lat:21 SizeLat:38 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:56 Lat:42 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i8' ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:23 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:32 Lat:36 SizeLat:46 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i8' ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_rotate_i8' ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_rotate_i8' ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_rotate_i8' ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ret void } ; ; Constant Unary Funnel Shifts (Rotates) ; define void @constant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'constant_rotate_i64' ; SSE-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; SSE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:15 SizeLat:17 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) ; SSE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:30 SizeLat:34 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) ; SSE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:60 SizeLat:68 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'constant_rotate_i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:14 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:17 CodeSize:28 Lat:17 SizeLat:38 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:34 CodeSize:56 Lat:34 SizeLat:76 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'constant_rotate_i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:8 SizeLat:4 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:10 SizeLat:8 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:8 Lat:20 SizeLat:16 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'constant_rotate_i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'constant_rotate_i64' ; SLM-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; SLM-NEXT: Cost Model: Found costs of RThru:13 CodeSize:12 Lat:15 SizeLat:17 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) ; SLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:24 Lat:30 SizeLat:34 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) ; SLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:48 Lat:60 SizeLat:68 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'constant_rotate_i64' ; GLM-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:15 SizeLat:17 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) ; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:30 SizeLat:34 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) ; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:60 SizeLat:68 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'constant_rotate_i64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ret void } define void @constant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSSE3-LABEL: 'constant_rotate_i32' ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 7) ; SSSE3-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:22 SizeLat:28 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:40 CodeSize:47 Lat:43 SizeLat:55 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:80 CodeSize:93 Lat:85 SizeLat:109 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'constant_rotate_i32' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 7) ; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:30 SizeLat:22 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:35 Lat:59 SizeLat:43 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:80 CodeSize:69 Lat:117 SizeLat:85 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'constant_rotate_i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 7) ; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:15 Lat:14 SizeLat:21 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:37 Lat:25 SizeLat:52 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:74 Lat:50 SizeLat:104 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'constant_rotate_i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 7) ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:8 SizeLat:8 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:10 SizeLat:10 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:8 Lat:20 SizeLat:20 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'constant_rotate_i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 7) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'constant_rotate_i32' ; SLM-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 7) ; SLM-NEXT: Cost Model: Found costs of RThru:29 CodeSize:18 Lat:30 SizeLat:28 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) ; SLM-NEXT: Cost Model: Found costs of RThru:58 CodeSize:35 Lat:59 SizeLat:55 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) ; SLM-NEXT: Cost Model: Found costs of RThru:116 CodeSize:69 Lat:117 SizeLat:109 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'constant_rotate_i32' ; GLM-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 7) ; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:30 SizeLat:22 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) ; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:35 Lat:59 SizeLat:43 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) ; GLM-NEXT: Cost Model: Found costs of RThru:80 CodeSize:69 Lat:117 SizeLat:85 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'constant_rotate_i32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 7) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 7) %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ret void } define void @constant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSSE3-LABEL: 'constant_rotate_i16' ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; SSSE3-NEXT: Cost Model: Found costs of RThru:19 CodeSize:34 Lat:26 SizeLat:34 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:38 CodeSize:67 Lat:51 SizeLat:67 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:76 CodeSize:133 Lat:101 SizeLat:133 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'constant_rotate_i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:26 Lat:33 SizeLat:30 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:50 CodeSize:51 Lat:65 SizeLat:59 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:100 CodeSize:101 Lat:129 SizeLat:117 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'constant_rotate_i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; AVX1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:17 Lat:23 SizeLat:25 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:37 CodeSize:42 Lat:41 SizeLat:62 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:74 CodeSize:84 Lat:82 SizeLat:124 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'constant_rotate_i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:18 SizeLat:13 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:13 Lat:17 SizeLat:20 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:26 Lat:34 SizeLat:40 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'constant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; AVX512F-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:18 SizeLat:13 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:13 Lat:17 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:26 CodeSize:28 Lat:40 SizeLat:36 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'constant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'constant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:18 SizeLat:13 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:13 Lat:17 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:26 CodeSize:28 Lat:40 SizeLat:36 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'constant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'constant_rotate_i16' ; SLM-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; SLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:26 Lat:33 SizeLat:30 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; SLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:51 Lat:65 SizeLat:59 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; SLM-NEXT: Cost Model: Found costs of RThru:104 CodeSize:101 Lat:129 SizeLat:117 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'constant_rotate_i16' ; GLM-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; GLM-NEXT: Cost Model: Found costs of RThru:25 CodeSize:26 Lat:33 SizeLat:30 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; GLM-NEXT: Cost Model: Found costs of RThru:50 CodeSize:51 Lat:65 SizeLat:59 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; GLM-NEXT: Cost Model: Found costs of RThru:100 CodeSize:101 Lat:129 SizeLat:117 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'constant_rotate_i16' ; XOP-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'constant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ret void } define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSSE3-LABEL: 'constant_rotate_i8' ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; SSSE3-NEXT: Cost Model: Found costs of RThru:29 CodeSize:55 Lat:51 SizeLat:60 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:58 CodeSize:109 Lat:101 SizeLat:119 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:116 CodeSize:217 Lat:201 SizeLat:237 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'constant_rotate_i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; SSE42-NEXT: Cost Model: Found costs of RThru:33 CodeSize:37 Lat:53 SizeLat:48 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:66 CodeSize:73 Lat:105 SizeLat:95 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:132 CodeSize:145 Lat:209 SizeLat:189 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'constant_rotate_i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; AVX1-NEXT: Cost Model: Found costs of RThru:23 CodeSize:25 Lat:50 SizeLat:37 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:50 CodeSize:63 Lat:48 SizeLat:91 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:100 CodeSize:126 Lat:96 SizeLat:182 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'constant_rotate_i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:25 Lat:50 SizeLat:36 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:25 Lat:55 SizeLat:50 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:50 Lat:110 SizeLat:100 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'constant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; AVX512F-NEXT: Cost Model: Found costs of RThru:14 CodeSize:25 Lat:50 SizeLat:36 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:16 CodeSize:25 Lat:55 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:34 CodeSize:63 Lat:46 SizeLat:75 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'constant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'constant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:14 CodeSize:25 Lat:50 SizeLat:36 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:16 CodeSize:25 Lat:55 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:34 CodeSize:63 Lat:46 SizeLat:75 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'constant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'constant_rotate_i8' ; SLM-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; SLM-NEXT: Cost Model: Found costs of RThru:33 CodeSize:37 Lat:53 SizeLat:48 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; SLM-NEXT: Cost Model: Found costs of RThru:66 CodeSize:73 Lat:105 SizeLat:95 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; SLM-NEXT: Cost Model: Found costs of RThru:132 CodeSize:145 Lat:209 SizeLat:189 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'constant_rotate_i8' ; GLM-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; GLM-NEXT: Cost Model: Found costs of RThru:33 CodeSize:37 Lat:53 SizeLat:48 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; GLM-NEXT: Cost Model: Found costs of RThru:66 CodeSize:73 Lat:105 SizeLat:95 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; GLM-NEXT: Cost Model: Found costs of RThru:132 CodeSize:145 Lat:209 SizeLat:189 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'constant_rotate_i8' ; XOP-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'constant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ret void } ; ; Uniform Constant Unary Funnel Shifts (Rotates) ; define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:5 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:10 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:20 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:15 SizeLat:18 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:28 Lat:30 SizeLat:36 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; AVX2-NEXT: Cost Model: Found costs of 4 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:6 SizeLat:8 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:12 SizeLat:16 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; SLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:5 SizeLat:5 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:8 Lat:10 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:16 Lat:20 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:5 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:10 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:20 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ret void } define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) ; SSE-NEXT: Cost Model: Found costs of 4 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:7 SizeLat:7 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:13 Lat:13 SizeLat:13 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:15 SizeLat:18 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:28 Lat:30 SizeLat:36 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) ; AVX2-NEXT: Cost Model: Found costs of 4 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:6 SizeLat:8 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:12 SizeLat:16 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) ; SLM-NEXT: Cost Model: Found costs of 4 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:7 SizeLat:7 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:13 Lat:13 SizeLat:13 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) ; GLM-NEXT: Cost Model: Found costs of 4 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:7 SizeLat:7 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:13 Lat:13 SizeLat:13 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ret void } define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; SSE-NEXT: Cost Model: Found costs of 4 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:7 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:13 Lat:13 SizeLat:13 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:15 SizeLat:18 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:28 Lat:30 SizeLat:36 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX2-NEXT: Cost Model: Found costs of 4 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:6 SizeLat:8 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:12 SizeLat:16 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX512F-NEXT: Cost Model: Found costs of 4 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:16 SizeLat:11 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:14 CodeSize:16 Lat:28 SizeLat:20 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:5 SizeLat:3 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:5 SizeLat:3 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:5 SizeLat:3 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX512DQ-NEXT: Cost Model: Found costs of 4 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:16 SizeLat:11 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:14 CodeSize:16 Lat:28 SizeLat:20 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; SLM-NEXT: Cost Model: Found costs of 4 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:7 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:13 Lat:13 SizeLat:13 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; GLM-NEXT: Cost Model: Found costs of 4 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:7 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:13 Lat:13 SizeLat:13 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ret void } define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:16 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:11 Lat:31 SizeLat:15 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:21 Lat:61 SizeLat:29 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:16 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:20 Lat:17 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:40 Lat:34 SizeLat:48 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:18 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:18 SizeLat:12 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:12 Lat:36 SizeLat:24 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:18 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:18 SizeLat:11 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:32 SizeLat:18 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:8 SizeLat:4 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:9 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:9 SizeLat:4 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:18 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:18 SizeLat:11 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:32 SizeLat:18 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:8 SizeLat:4 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:9 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:9 SizeLat:4 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:16 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:11 Lat:31 SizeLat:15 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:21 Lat:61 SizeLat:29 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:16 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:11 Lat:31 SizeLat:15 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:21 Lat:61 SizeLat:29 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ret void } declare i64 @llvm.fshr.i64(i64, i64, i64) declare i32 @llvm.fshr.i32(i32, i32, i32) declare i16 @llvm.fshr.i16(i16, i16, i16) declare i8 @llvm.fshr.i8 (i8, i8, i8) declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) declare <32 x i8> @llvm.fshr.v32i8(<32 x i8>, <32 x i8>, <32 x i8>) declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) declare <64 x i8> @llvm.fshr.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)