diff options
Diffstat (limited to 'llvm/test/Analysis')
42 files changed, 5871 insertions, 5157 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll index 9801d14..388362f 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll @@ -3,619 +3,619 @@ define void @fcmp_oeq(i32 %arg) { ; CHECK-LABEL: 'fcmp_oeq' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oeq <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oeq <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oeq <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oeq <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oeq <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oeq <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oeq <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oeq <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oeq <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oeq <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oeq <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oeq <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oeq <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oeq <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oeq <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oeq <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oeq <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oeq <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp oeq <2 x float> undef, undef - %v4f32 = fcmp oeq <4 x float> undef, undef - %v8f32 = fcmp oeq <8 x float> undef, undef - %v2f64 = fcmp oeq <2 x double> undef, undef - %v4f64 = fcmp oeq <4 x double> undef, undef - %v2f16 = fcmp oeq <2 x half> undef, undef - %v4f16 = fcmp oeq <4 x half> undef, undef - %v8f16 = fcmp oeq <8 x half> undef, undef - %v16f16 = fcmp oeq <16 x half> undef, undef + %v2f32 = fcmp oeq <vscale x 2 x float> undef, undef + %v4f32 = fcmp oeq <vscale x 4 x float> undef, undef + %v8f32 = fcmp oeq <vscale x 8 x float> undef, undef + %v2f64 = fcmp oeq <vscale x 2 x double> undef, undef + %v4f64 = fcmp oeq <vscale x 4 x double> undef, undef + %v2f16 = fcmp oeq <vscale x 2 x half> undef, undef + %v4f16 = fcmp oeq <vscale x 4 x half> undef, undef + %v8f16 = fcmp oeq <vscale x 8 x half> undef, undef + %v16f16 = fcmp oeq <vscale x 16 x half> undef, undef ret void } define void @fcmp_oeq_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_oeq_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp oeq <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp oeq <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp oeq <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp oeq <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp oeq <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp oeq <2 x bfloat> undef, undef - %v4bf16 = fcmp oeq <4 x bfloat> undef, undef - %v8bf16 = fcmp oeq <8 x bfloat> undef, undef + %v2bf16 = fcmp oeq <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp oeq <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp oeq <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_ogt(i32 %arg) { ; CHECK-LABEL: 'fcmp_ogt' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ogt <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ogt <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ogt <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ogt <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ogt <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ogt <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ogt <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ogt <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ogt <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ogt <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ogt <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ogt <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ogt <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ogt <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ogt <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ogt <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ogt <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ogt <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp ogt <2 x float> undef, undef - %v4f32 = fcmp ogt <4 x float> undef, undef - %v8f32 = fcmp ogt <8 x float> undef, undef - %v2f64 = fcmp ogt <2 x double> undef, undef - %v4f64 = fcmp ogt <4 x double> undef, undef - %v2f16 = fcmp ogt <2 x half> undef, undef - %v4f16 = fcmp ogt <4 x half> undef, undef - %v8f16 = fcmp ogt <8 x half> undef, undef - %v16f16 = fcmp ogt <16 x half> undef, undef + %v2f32 = fcmp ogt <vscale x 2 x float> undef, undef + %v4f32 = fcmp ogt <vscale x 4 x float> undef, undef + %v8f32 = fcmp ogt <vscale x 8 x float> undef, undef + %v2f64 = fcmp ogt <vscale x 2 x double> undef, undef + %v4f64 = fcmp ogt <vscale x 4 x double> undef, undef + %v2f16 = fcmp ogt <vscale x 2 x half> undef, undef + %v4f16 = fcmp ogt <vscale x 4 x half> undef, undef + %v8f16 = fcmp ogt <vscale x 8 x half> undef, undef + %v16f16 = fcmp ogt <vscale x 16 x half> undef, undef ret void } define void @fcmp_ogt_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ogt_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ogt <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ogt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ogt <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ogt <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ogt <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp ogt <2 x bfloat> undef, undef - %v4bf16 = fcmp ogt <4 x bfloat> undef, undef - %v8bf16 = fcmp ogt <8 x bfloat> undef, undef + %v2bf16 = fcmp ogt <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp ogt <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp ogt <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_oge(i32 %arg) { ; CHECK-LABEL: 'fcmp_oge' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oge <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oge <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oge <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oge <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oge <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oge <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oge <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oge <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oge <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oge <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oge <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oge <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oge <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oge <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oge <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oge <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oge <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oge <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp oge <2 x float> undef, undef - %v4f32 = fcmp oge <4 x float> undef, undef - %v8f32 = fcmp oge <8 x float> undef, undef - %v2f64 = fcmp oge <2 x double> undef, undef - %v4f64 = fcmp oge <4 x double> undef, undef - %v2f16 = fcmp oge <2 x half> undef, undef - %v4f16 = fcmp oge <4 x half> undef, undef - %v8f16 = fcmp oge <8 x half> undef, undef - %v16f16 = fcmp oge <16 x half> undef, undef + %v2f32 = fcmp oge <vscale x 2 x float> undef, undef + %v4f32 = fcmp oge <vscale x 4 x float> undef, undef + %v8f32 = fcmp oge <vscale x 8 x float> undef, undef + %v2f64 = fcmp oge <vscale x 2 x double> undef, undef + %v4f64 = fcmp oge <vscale x 4 x double> undef, undef + %v2f16 = fcmp oge <vscale x 2 x half> undef, undef + %v4f16 = fcmp oge <vscale x 4 x half> undef, undef + %v8f16 = fcmp oge <vscale x 8 x half> undef, undef + %v16f16 = fcmp oge <vscale x 16 x half> undef, undef ret void } define void @fcmp_oge_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_oge_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp oge <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp oge <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp oge <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp oge <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp oge <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp oge <2 x bfloat> undef, undef - %v4bf16 = fcmp oge <4 x bfloat> undef, undef - %v8bf16 = fcmp oge <8 x bfloat> undef, undef + %v2bf16 = fcmp oge <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp oge <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp oge <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_olt(i32 %arg) { ; CHECK-LABEL: 'fcmp_olt' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp olt <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp olt <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp olt <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp olt <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp olt <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp olt <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp olt <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp olt <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp olt <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp olt <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp olt <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp olt <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp olt <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp olt <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp olt <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp olt <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp olt <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp olt <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp olt <2 x float> undef, undef - %v4f32 = fcmp olt <4 x float> undef, undef - %v8f32 = fcmp olt <8 x float> undef, undef - %v2f64 = fcmp olt <2 x double> undef, undef - %v4f64 = fcmp olt <4 x double> undef, undef - %v2f16 = fcmp olt <2 x half> undef, undef - %v4f16 = fcmp olt <4 x half> undef, undef - %v8f16 = fcmp olt <8 x half> undef, undef - %v16f16 = fcmp olt <16 x half> undef, undef + %v2f32 = fcmp olt <vscale x 2 x float> undef, undef + %v4f32 = fcmp olt <vscale x 4 x float> undef, undef + %v8f32 = fcmp olt <vscale x 8 x float> undef, undef + %v2f64 = fcmp olt <vscale x 2 x double> undef, undef + %v4f64 = fcmp olt <vscale x 4 x double> undef, undef + %v2f16 = fcmp olt <vscale x 2 x half> undef, undef + %v4f16 = fcmp olt <vscale x 4 x half> undef, undef + %v8f16 = fcmp olt <vscale x 8 x half> undef, undef + %v16f16 = fcmp olt <vscale x 16 x half> undef, undef ret void } define void @fcmp_olt_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_olt_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp olt <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp olt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp olt <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp olt <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp olt <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp olt <2 x bfloat> undef, undef - %v4bf16 = fcmp olt <4 x bfloat> undef, undef - %v8bf16 = fcmp olt <8 x bfloat> undef, undef + %v2bf16 = fcmp olt <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp olt <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp olt <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_ole(i32 %arg) { ; CHECK-LABEL: 'fcmp_ole' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ole <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ole <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ole <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ole <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ole <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ole <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ole <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ole <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ole <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ole <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ole <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ole <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ole <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ole <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ole <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ole <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ole <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ole <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp ole <2 x float> undef, undef - %v4f32 = fcmp ole <4 x float> undef, undef - %v8f32 = fcmp ole <8 x float> undef, undef - %v2f64 = fcmp ole <2 x double> undef, undef - %v4f64 = fcmp ole <4 x double> undef, undef - %v2f16 = fcmp ole <2 x half> undef, undef - %v4f16 = fcmp ole <4 x half> undef, undef - %v8f16 = fcmp ole <8 x half> undef, undef - %v16f16 = fcmp ole <16 x half> undef, undef + %v2f32 = fcmp ole <vscale x 2 x float> undef, undef + %v4f32 = fcmp ole <vscale x 4 x float> undef, undef + %v8f32 = fcmp ole <vscale x 8 x float> undef, undef + %v2f64 = fcmp ole <vscale x 2 x double> undef, undef + %v4f64 = fcmp ole <vscale x 4 x double> undef, undef + %v2f16 = fcmp ole <vscale x 2 x half> undef, undef + %v4f16 = fcmp ole <vscale x 4 x half> undef, undef + %v8f16 = fcmp ole <vscale x 8 x half> undef, undef + %v16f16 = fcmp ole <vscale x 16 x half> undef, undef ret void } define void @fcmp_ole_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ole_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ole <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ole <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ole <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ole <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ole <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp ole <2 x bfloat> undef, undef - %v4bf16 = fcmp ole <4 x bfloat> undef, undef - %v8bf16 = fcmp ole <8 x bfloat> undef, undef + %v2bf16 = fcmp ole <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp ole <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp ole <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_one(i32 %arg) { ; CHECK-LABEL: 'fcmp_one' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp one <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp one <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp one <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp one <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp one <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp one <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp one <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp one <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp one <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp one <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp one <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp one <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp one <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp one <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp one <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp one <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp one <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp one <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp one <2 x float> undef, undef - %v4f32 = fcmp one <4 x float> undef, undef - %v8f32 = fcmp one <8 x float> undef, undef - %v2f64 = fcmp one <2 x double> undef, undef - %v4f64 = fcmp one <4 x double> undef, undef - %v2f16 = fcmp one <2 x half> undef, undef - %v4f16 = fcmp one <4 x half> undef, undef - %v8f16 = fcmp one <8 x half> undef, undef - %v16f16 = fcmp one <16 x half> undef, undef + %v2f32 = fcmp one <vscale x 2 x float> undef, undef + %v4f32 = fcmp one <vscale x 4 x float> undef, undef + %v8f32 = fcmp one <vscale x 8 x float> undef, undef + %v2f64 = fcmp one <vscale x 2 x double> undef, undef + %v4f64 = fcmp one <vscale x 4 x double> undef, undef + %v2f16 = fcmp one <vscale x 2 x half> undef, undef + %v4f16 = fcmp one <vscale x 4 x half> undef, undef + %v8f16 = fcmp one <vscale x 8 x half> undef, undef + %v16f16 = fcmp one <vscale x 16 x half> undef, undef ret void } define void @fcmp_one_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_one_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp one <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp one <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp one <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp one <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp one <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp one <2 x bfloat> undef, undef - %v4bf16 = fcmp one <4 x bfloat> undef, undef - %v8bf16 = fcmp one <8 x bfloat> undef, undef + %v2bf16 = fcmp one <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp one <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp one <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_ord(i32 %arg) { ; CHECK-LABEL: 'fcmp_ord' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ord <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ord <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ord <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ord <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ord <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ord <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ord <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ord <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ord <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ord <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ord <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ord <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ord <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ord <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ord <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ord <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ord <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ord <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp ord <2 x float> undef, undef - %v4f32 = fcmp ord <4 x float> undef, undef - %v8f32 = fcmp ord <8 x float> undef, undef - %v2f64 = fcmp ord <2 x double> undef, undef - %v4f64 = fcmp ord <4 x double> undef, undef - %v2f16 = fcmp ord <2 x half> undef, undef - %v4f16 = fcmp ord <4 x half> undef, undef - %v8f16 = fcmp ord <8 x half> undef, undef - %v16f16 = fcmp ord <16 x half> undef, undef + %v2f32 = fcmp ord <vscale x 2 x float> undef, undef + %v4f32 = fcmp ord <vscale x 4 x float> undef, undef + %v8f32 = fcmp ord <vscale x 8 x float> undef, undef + %v2f64 = fcmp ord <vscale x 2 x double> undef, undef + %v4f64 = fcmp ord <vscale x 4 x double> undef, undef + %v2f16 = fcmp ord <vscale x 2 x half> undef, undef + %v4f16 = fcmp ord <vscale x 4 x half> undef, undef + %v8f16 = fcmp ord <vscale x 8 x half> undef, undef + %v16f16 = fcmp ord <vscale x 16 x half> undef, undef ret void } define void @fcmp_ord_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ord_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ord <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ord <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ord <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ord <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ord <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp ord <2 x bfloat> undef, undef - %v4bf16 = fcmp ord <4 x bfloat> undef, undef - %v8bf16 = fcmp ord <8 x bfloat> undef, undef + %v2bf16 = fcmp ord <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp ord <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp ord <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_ueq(i32 %arg) { ; CHECK-LABEL: 'fcmp_ueq' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ueq <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ueq <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ueq <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ueq <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ueq <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ueq <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ueq <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ueq <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ueq <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ueq <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ueq <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ueq <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ueq <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ueq <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ueq <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ueq <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ueq <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ueq <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp ueq <2 x float> undef, undef - %v4f32 = fcmp ueq <4 x float> undef, undef - %v8f32 = fcmp ueq <8 x float> undef, undef - %v2f64 = fcmp ueq <2 x double> undef, undef - %v4f64 = fcmp ueq <4 x double> undef, undef - %v2f16 = fcmp ueq <2 x half> undef, undef - %v4f16 = fcmp ueq <4 x half> undef, undef - %v8f16 = fcmp ueq <8 x half> undef, undef - %v16f16 = fcmp ueq <16 x half> undef, undef + %v2f32 = fcmp ueq <vscale x 2 x float> undef, undef + %v4f32 = fcmp ueq <vscale x 4 x float> undef, undef + %v8f32 = fcmp ueq <vscale x 8 x float> undef, undef + %v2f64 = fcmp ueq <vscale x 2 x double> undef, undef + %v4f64 = fcmp ueq <vscale x 4 x double> undef, undef + %v2f16 = fcmp ueq <vscale x 2 x half> undef, undef + %v4f16 = fcmp ueq <vscale x 4 x half> undef, undef + %v8f16 = fcmp ueq <vscale x 8 x half> undef, undef + %v16f16 = fcmp ueq <vscale x 16 x half> undef, undef ret void } define void @fcmp_ueq_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ueq_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ueq <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ueq <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ueq <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ueq <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ueq <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp ueq <2 x bfloat> undef, undef - %v4bf16 = fcmp ueq <4 x bfloat> undef, undef - %v8bf16 = fcmp ueq <8 x bfloat> undef, undef + %v2bf16 = fcmp ueq <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp ueq <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp ueq <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_ugt(i32 %arg) { ; CHECK-LABEL: 'fcmp_ugt' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ugt <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ugt <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ugt <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ugt <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ugt <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ugt <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ugt <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ugt <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ugt <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ugt <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ugt <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ugt <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ugt <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ugt <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ugt <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ugt <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ugt <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ugt <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp ugt <2 x float> undef, undef - %v4f32 = fcmp ugt <4 x float> undef, undef - %v8f32 = fcmp ugt <8 x float> undef, undef - %v2f64 = fcmp ugt <2 x double> undef, undef - %v4f64 = fcmp ugt <4 x double> undef, undef - %v2f16 = fcmp ugt <2 x half> undef, undef - %v4f16 = fcmp ugt <4 x half> undef, undef - %v8f16 = fcmp ugt <8 x half> undef, undef - %v16f16 = fcmp ugt <16 x half> undef, undef + %v2f32 = fcmp ugt <vscale x 2 x float> undef, undef + %v4f32 = fcmp ugt <vscale x 4 x float> undef, undef + %v8f32 = fcmp ugt <vscale x 8 x float> undef, undef + %v2f64 = fcmp ugt <vscale x 2 x double> undef, undef + %v4f64 = fcmp ugt <vscale x 4 x double> undef, undef + %v2f16 = fcmp ugt <vscale x 2 x half> undef, undef + %v4f16 = fcmp ugt <vscale x 4 x half> undef, undef + %v8f16 = fcmp ugt <vscale x 8 x half> undef, undef + %v16f16 = fcmp ugt <vscale x 16 x half> undef, undef ret void } define void @fcmp_ugt_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ugt_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ugt <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ugt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ugt <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ugt <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ugt <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp ugt <2 x bfloat> undef, undef - %v4bf16 = fcmp ugt <4 x bfloat> undef, undef - %v8bf16 = fcmp ugt <8 x bfloat> undef, undef + %v2bf16 = fcmp ugt <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp ugt <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp ugt <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_uge(i32 %arg) { ; CHECK-LABEL: 'fcmp_uge' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uge <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uge <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uge <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uge <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uge <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uge <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uge <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uge <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uge <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uge <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uge <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uge <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uge <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uge <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uge <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uge <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uge <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uge <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp uge <2 x float> undef, undef - %v4f32 = fcmp uge <4 x float> undef, undef - %v8f32 = fcmp uge <8 x float> undef, undef - %v2f64 = fcmp uge <2 x double> undef, undef - %v4f64 = fcmp uge <4 x double> undef, undef - %v2f16 = fcmp uge <2 x half> undef, undef - %v4f16 = fcmp uge <4 x half> undef, undef - %v8f16 = fcmp uge <8 x half> undef, undef - %v16f16 = fcmp uge <16 x half> undef, undef + %v2f32 = fcmp uge <vscale x 2 x float> undef, undef + %v4f32 = fcmp uge <vscale x 4 x float> undef, undef + %v8f32 = fcmp uge <vscale x 8 x float> undef, undef + %v2f64 = fcmp uge <vscale x 2 x double> undef, undef + %v4f64 = fcmp uge <vscale x 4 x double> undef, undef + %v2f16 = fcmp uge <vscale x 2 x half> undef, undef + %v4f16 = fcmp uge <vscale x 4 x half> undef, undef + %v8f16 = fcmp uge <vscale x 8 x half> undef, undef + %v16f16 = fcmp uge <vscale x 16 x half> undef, undef ret void } define void @fcmp_uge_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_uge_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp uge <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp uge <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp uge <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp uge <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp uge <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp uge <2 x bfloat> undef, undef - %v4bf16 = fcmp uge <4 x bfloat> undef, undef - %v8bf16 = fcmp uge <8 x bfloat> undef, undef + %v2bf16 = fcmp uge <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp uge <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp uge <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_ult(i32 %arg) { ; CHECK-LABEL: 'fcmp_ult' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ult <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ult <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ult <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ult <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ult <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ult <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ult <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ult <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ult <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ult <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ult <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ult <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ult <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ult <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ult <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ult <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ult <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ult <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp ult <2 x float> undef, undef - %v4f32 = fcmp ult <4 x float> undef, undef - %v8f32 = fcmp ult <8 x float> undef, undef - %v2f64 = fcmp ult <2 x double> undef, undef - %v4f64 = fcmp ult <4 x double> undef, undef - %v2f16 = fcmp ult <2 x half> undef, undef - %v4f16 = fcmp ult <4 x half> undef, undef - %v8f16 = fcmp ult <8 x half> undef, undef - %v16f16 = fcmp ult <16 x half> undef, undef + %v2f32 = fcmp ult <vscale x 2 x float> undef, undef + %v4f32 = fcmp ult <vscale x 4 x float> undef, undef + %v8f32 = fcmp ult <vscale x 8 x float> undef, undef + %v2f64 = fcmp ult <vscale x 2 x double> undef, undef + %v4f64 = fcmp ult <vscale x 4 x double> undef, undef + %v2f16 = fcmp ult <vscale x 2 x half> undef, undef + %v4f16 = fcmp ult <vscale x 4 x half> undef, undef + %v8f16 = fcmp ult <vscale x 8 x half> undef, undef + %v16f16 = fcmp ult <vscale x 16 x half> undef, undef ret void } define void @fcmp_ult_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ult_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ult <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ult <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ult <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ult <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ult <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp ult <2 x bfloat> undef, undef - %v4bf16 = fcmp ult <4 x bfloat> undef, undef - %v8bf16 = fcmp ult <8 x bfloat> undef, undef + %v2bf16 = fcmp ult <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp ult <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp ult <vscale x 8 x bfloat> undef, undef ret void } define void @fcmp_ule(i32 %arg) { ; CHECK-LABEL: 'fcmp_ule' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ule <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ule <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ule <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ule <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ule <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ule <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ule <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ule <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ule <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ule <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ule <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ule <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ule <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ule <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ule <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ule <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ule <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ule <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp ule <2 x float> undef, undef - %v4f32 = fcmp ule <4 x float> undef, undef - %v8f32 = fcmp ule <8 x float> undef, undef - %v2f64 = fcmp ule <2 x double> undef, undef - %v4f64 = fcmp ule <4 x double> undef, undef - %v2f16 = fcmp ule <2 x half> undef, undef - %v4f16 = fcmp ule <4 x half> undef, undef - %v8f16 = fcmp ule <8 x half> undef, undef - %v16f16 = fcmp ule <16 x half> undef, undef + %v2f32 = fcmp ule <vscale x 2 x float> undef, undef + %v4f32 = fcmp ule <vscale x 4 x float> undef, undef + %v8f32 = fcmp ule <vscale x 8 x float> undef, undef + %v2f64 = fcmp ule <vscale x 2 x double> undef, undef + %v4f64 = fcmp ule <vscale x 4 x double> undef, undef + %v2f16 = fcmp ule <vscale x 2 x half> undef, undef + %v4f16 = fcmp ule <vscale x 4 x half> undef, undef + %v8f16 = fcmp ule <vscale x 8 x half> undef, undef + %v16f16 = fcmp ule <vscale x 16 x half> undef, undef ret void } define void @fcmp_ule_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ule_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ule <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ule <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ule <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ule <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ule <vscale x 8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = fcmp ule <vscale x 16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp ule <2 x bfloat> undef, undef - %v4bf16 = fcmp ule <4 x bfloat> undef, undef - %v8bf16 = fcmp ule <8 x bfloat> undef, undef - %v16bf16 = fcmp ule <16 x bfloat> undef, undef + %v2bf16 = fcmp ule <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp ule <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp ule <vscale x 8 x bfloat> undef, undef + %v16bf16 = fcmp ule <vscale x 16 x bfloat> undef, undef ret void } define void @fcmp_une(i32 %arg) { ; CHECK-LABEL: 'fcmp_une' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp une <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp une <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp une <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp une <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp une <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp une <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp une <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp une <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp une <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp une <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp une <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp une <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp une <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp une <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp une <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp une <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp une <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp une <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp une <2 x float> undef, undef - %v4f32 = fcmp une <4 x float> undef, undef - %v8f32 = fcmp une <8 x float> undef, undef - %v2f64 = fcmp une <2 x double> undef, undef - %v4f64 = fcmp une <4 x double> undef, undef - %v2f16 = fcmp une <2 x half> undef, undef - %v4f16 = fcmp une <4 x half> undef, undef - %v8f16 = fcmp une <8 x half> undef, undef - %v16f16 = fcmp une <16 x half> undef, undef + %v2f32 = fcmp une <vscale x 2 x float> undef, undef + %v4f32 = fcmp une <vscale x 4 x float> undef, undef + %v8f32 = fcmp une <vscale x 8 x float> undef, undef + %v2f64 = fcmp une <vscale x 2 x double> undef, undef + %v4f64 = fcmp une <vscale x 4 x double> undef, undef + %v2f16 = fcmp une <vscale x 2 x half> undef, undef + %v4f16 = fcmp une <vscale x 4 x half> undef, undef + %v8f16 = fcmp une <vscale x 8 x half> undef, undef + %v16f16 = fcmp une <vscale x 16 x half> undef, undef ret void } define void @fcmp_une_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_une_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp une <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp une <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp une <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp une <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp une <vscale x 8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = fcmp une <vscale x 16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp une <2 x bfloat> undef, undef - %v4bf16 = fcmp une <4 x bfloat> undef, undef - %v8bf16 = fcmp une <8 x bfloat> undef, undef - %v16bf16 = fcmp une <16 x bfloat> undef, undef + %v2bf16 = fcmp une <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp une <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp une <vscale x 8 x bfloat> undef, undef + %v16bf16 = fcmp une <vscale x 16 x bfloat> undef, undef ret void } define void @fcmp_uno(i32 %arg) { ; CHECK-LABEL: 'fcmp_uno' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uno <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uno <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uno <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uno <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uno <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uno <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uno <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uno <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uno <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uno <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uno <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uno <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uno <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uno <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uno <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uno <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uno <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uno <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp uno <2 x float> undef, undef - %v4f32 = fcmp uno <4 x float> undef, undef - %v8f32 = fcmp uno <8 x float> undef, undef - %v2f64 = fcmp uno <2 x double> undef, undef - %v4f64 = fcmp uno <4 x double> undef, undef - %v2f16 = fcmp uno <2 x half> undef, undef - %v4f16 = fcmp uno <4 x half> undef, undef - %v8f16 = fcmp uno <8 x half> undef, undef - %v16f16 = fcmp uno <16 x half> undef, undef + %v2f32 = fcmp uno <vscale x 2 x float> undef, undef + %v4f32 = fcmp uno <vscale x 4 x float> undef, undef + %v8f32 = fcmp uno <vscale x 8 x float> undef, undef + %v2f64 = fcmp uno <vscale x 2 x double> undef, undef + %v4f64 = fcmp uno <vscale x 4 x double> undef, undef + %v2f16 = fcmp uno <vscale x 2 x half> undef, undef + %v4f16 = fcmp uno <vscale x 4 x half> undef, undef + %v8f16 = fcmp uno <vscale x 8 x half> undef, undef + %v16f16 = fcmp uno <vscale x 16 x half> undef, undef ret void } define void @fcmp_uno_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_uno_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp uno <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp uno <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp uno <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp uno <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp uno <vscale x 8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = fcmp uno <vscale x 16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp uno <2 x bfloat> undef, undef - %v4bf16 = fcmp uno <4 x bfloat> undef, undef - %v8bf16 = fcmp uno <8 x bfloat> undef, undef - %v16bf16 = fcmp uno <16 x bfloat> undef, undef + %v2bf16 = fcmp uno <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp uno <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp uno <vscale x 8 x bfloat> undef, undef + %v16bf16 = fcmp uno <vscale x 16 x bfloat> undef, undef ret void } define void @fcmp_true(i32 %arg) { ; CHECK-LABEL: 'fcmp_true' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp true <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp true <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp true <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp true <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp true <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp true <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp true <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp true <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp true <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp true <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp true <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp true <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp true <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp true <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp true <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp true <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp true <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp true <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp true <2 x float> undef, undef - %v4f32 = fcmp true <4 x float> undef, undef - %v8f32 = fcmp true <8 x float> undef, undef - %v2f64 = fcmp true <2 x double> undef, undef - %v4f64 = fcmp true <4 x double> undef, undef - %v2f16 = fcmp true <2 x half> undef, undef - %v4f16 = fcmp true <4 x half> undef, undef - %v8f16 = fcmp true <8 x half> undef, undef - %v16f16 = fcmp true <16 x half> undef, undef + %v2f32 = fcmp true <vscale x 2 x float> undef, undef + %v4f32 = fcmp true <vscale x 4 x float> undef, undef + %v8f32 = fcmp true <vscale x 8 x float> undef, undef + %v2f64 = fcmp true <vscale x 2 x double> undef, undef + %v4f64 = fcmp true <vscale x 4 x double> undef, undef + %v2f16 = fcmp true <vscale x 2 x half> undef, undef + %v4f16 = fcmp true <vscale x 4 x half> undef, undef + %v8f16 = fcmp true <vscale x 8 x half> undef, undef + %v16f16 = fcmp true <vscale x 16 x half> undef, undef ret void } define void @fcmp_true_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_true_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp true <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp true <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp true <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp true <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp true <vscale x 8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = fcmp true <vscale x 16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp true <2 x bfloat> undef, undef - %v4bf16 = fcmp true <4 x bfloat> undef, undef - %v8bf16 = fcmp true <8 x bfloat> undef, undef - %v16bf16 = fcmp true <16 x bfloat> undef, undef + %v2bf16 = fcmp true <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp true <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp true <vscale x 8 x bfloat> undef, undef + %v16bf16 = fcmp true <vscale x 16 x bfloat> undef, undef ret void } define void @fcmp_false(i32 %arg) { ; CHECK-LABEL: 'fcmp_false' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp false <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp false <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp false <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp false <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp false <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp false <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp false <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp false <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp false <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp false <vscale x 2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp false <vscale x 4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp false <vscale x 8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp false <vscale x 2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp false <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp false <vscale x 2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp false <vscale x 4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp false <vscale x 8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp false <vscale x 16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2f32 = fcmp false <2 x float> undef, undef - %v4f32 = fcmp false <4 x float> undef, undef - %v8f32 = fcmp false <8 x float> undef, undef - %v2f64 = fcmp false <2 x double> undef, undef - %v4f64 = fcmp false <4 x double> undef, undef - %v2f16 = fcmp false <2 x half> undef, undef - %v4f16 = fcmp false <4 x half> undef, undef - %v8f16 = fcmp false <8 x half> undef, undef - %v16f16 = fcmp false <16 x half> undef, undef + %v2f32 = fcmp false <vscale x 2 x float> undef, undef + %v4f32 = fcmp false <vscale x 4 x float> undef, undef + %v8f32 = fcmp false <vscale x 8 x float> undef, undef + %v2f64 = fcmp false <vscale x 2 x double> undef, undef + %v4f64 = fcmp false <vscale x 4 x double> undef, undef + %v2f16 = fcmp false <vscale x 2 x half> undef, undef + %v4f16 = fcmp false <vscale x 4 x half> undef, undef + %v8f16 = fcmp false <vscale x 8 x half> undef, undef + %v16f16 = fcmp false <vscale x 16 x half> undef, undef ret void } define void @fcmp_false_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_false_bfloat' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp false <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp false <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp false <vscale x 2 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp false <vscale x 4 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp false <vscale x 8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = fcmp false <vscale x 16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %v2bf16 = fcmp false <2 x bfloat> undef, undef - %v4bf16 = fcmp false <4 x bfloat> undef, undef - %v8bf16 = fcmp false <8 x bfloat> undef, undef - %v16bf16 = fcmp false <16 x bfloat> undef, undef + %v2bf16 = fcmp false <vscale x 2 x bfloat> undef, undef + %v4bf16 = fcmp false <vscale x 4 x bfloat> undef, undef + %v8bf16 = fcmp false <vscale x 8 x bfloat> undef, undef + %v16bf16 = fcmp false <vscale x 16 x bfloat> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll index 117315c..805b3713 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll @@ -31,3 +31,24 @@ define void @sve_fpext() { ret void } + +define void @sve_fpext_bf16() { +; CHECK-LABEL: 'sve_fpext_bf16' +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double> +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float> + %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float> + %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float> + + %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double> + %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double> + %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double> + + ret void +} diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll index a17c6ce..bb31ebf 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOBF16 +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve,+bf16 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BF16 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" @@ -31,3 +32,27 @@ define void @sve_fptruncs() { ret void } + +define void @sve_fptruncs_bf16() { +; CHECK-LABEL: 'sve_fptruncs_bf16' +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat> +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat> +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat> +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat> +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat> +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat> + %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat> + %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat> + + %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat> + %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat> + %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat> + + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-BF16: {{.*}} +; CHECK-NOBF16: {{.*}} diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index ee485e2..7e8d957 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -449,33 +449,33 @@ define void @vector_reverse() #0 { ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'vector_reverse' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; diff --git a/llvm/test/Analysis/CostModel/ARM/arith.ll b/llvm/test/Analysis/CostModel/ARM/arith.ll index 8f17359..3e9b61b 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith.ll @@ -1,74 +1,61 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve,+mve1beat < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE1 -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE2 -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve,+mve4beat < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE4 -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R -; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=thumbv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve,+mve1beat < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE1 +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE2 +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve,+mve4beat < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE4 +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define void @i1() { ; CHECK-LABEL: 'i1' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i1 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i1 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i1 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i1 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i1 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i1 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i1 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i1 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i1 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEXT: Cost Model: Found costs of 1 for: %c = add i1 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %d = sub i1 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %e = mul i1 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %f = ashr i1 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %g = lshr i1 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %h = shl i1 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %i = and i1 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %j = or i1 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %k = xor i1 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i1' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i1 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i1 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i1 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i1 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i1 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i1 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i1 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i1 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i1 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %c = add i1 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %d = sub i1 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %e = mul i1 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %f = ashr i1 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %g = lshr i1 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %h = shl i1 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %i = and i1 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %j = or i1 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %k = xor i1 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8M-BASE-LABEL: 'i1' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i1 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i1 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i1 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i1 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i1 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i1 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i1 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i1 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i1 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %c = add i1 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %d = sub i1 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %e = mul i1 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %f = ashr i1 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %g = lshr i1 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %h = shl i1 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %i = and i1 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %j = or i1 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %k = xor i1 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8R-LABEL: 'i1' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i1 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i1 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i1 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i1 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i1 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i1 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i1 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i1 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i1 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'i1' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %j = or i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c = add i1 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d = sub i1 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e = mul i1 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %f = ashr i1 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %g = lshr i1 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %h = shl i1 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i = and i1 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j = or i1 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k = xor i1 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c = add i1 undef, undef %d = sub i1 undef, undef @@ -84,64 +71,52 @@ define void @i1() { define void @i8() { ; CHECK-LABEL: 'i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEXT: Cost Model: Found costs of 1 for: %c = add i8 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %d = sub i8 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %e = mul i8 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %f = ashr i8 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %g = lshr i8 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %h = shl i8 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %i = and i8 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %j = or i8 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %k = xor i8 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i8' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i8 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i8 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i8 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i8 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i8 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %c = add i8 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %d = sub i8 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %e = mul i8 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %f = ashr i8 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %g = lshr i8 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %h = shl i8 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %i = and i8 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %j = or i8 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %k = xor i8 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8M-BASE-LABEL: 'i8' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i8 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i8 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i8 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i8 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i8 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %c = add i8 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %d = sub i8 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %e = mul i8 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %f = ashr i8 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %g = lshr i8 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %h = shl i8 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %i = and i8 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %j = or i8 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %k = xor i8 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8R-LABEL: 'i8' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i8 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i8 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i8 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i8 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i8 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'i8' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i8 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i8 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i8 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i8 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i8 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c = add i8 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d = sub i8 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e = mul i8 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %f = ashr i8 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %g = lshr i8 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %h = shl i8 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i = and i8 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j = or i8 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k = xor i8 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c = add i8 undef, undef %d = sub i8 undef, undef @@ -157,64 +132,52 @@ define void @i8() { define void @i16() { ; CHECK-LABEL: 'i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEXT: Cost Model: Found costs of 1 for: %c = add i16 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %d = sub i16 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %e = mul i16 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %f = ashr i16 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %g = lshr i16 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %h = shl i16 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %i = and i16 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %j = or i16 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %k = xor i16 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i16' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i16 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i16 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i16 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i16 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i16 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %c = add i16 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %d = sub i16 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %e = mul i16 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %f = ashr i16 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %g = lshr i16 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %h = shl i16 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %i = and i16 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %j = or i16 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %k = xor i16 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8M-BASE-LABEL: 'i16' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i16 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i16 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i16 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i16 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i16 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %c = add i16 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %d = sub i16 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %e = mul i16 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %f = ashr i16 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %g = lshr i16 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %h = shl i16 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %i = and i16 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %j = or i16 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %k = xor i16 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8R-LABEL: 'i16' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i16 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i16 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i16 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i16 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i16 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'i16' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i16 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i16 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i16 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i16 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i16 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c = add i16 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d = sub i16 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e = mul i16 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %f = ashr i16 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %g = lshr i16 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %h = shl i16 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i = and i16 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j = or i16 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k = xor i16 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c = add i16 undef, undef %d = sub i16 undef, undef @@ -230,64 +193,52 @@ define void @i16() { define void @i32() { ; CHECK-LABEL: 'i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEXT: Cost Model: Found costs of 1 for: %c = add i32 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %d = sub i32 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %e = mul i32 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %f = ashr i32 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %g = lshr i32 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %h = shl i32 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %i = and i32 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %j = or i32 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 1 for: %k = xor i32 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i32' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i32 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i32 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i32 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i32 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i32 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %c = add i32 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %d = sub i32 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %e = mul i32 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %f = ashr i32 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %g = lshr i32 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %h = shl i32 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %i = and i32 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %j = or i32 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: %k = xor i32 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8M-BASE-LABEL: 'i32' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i32 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i32 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i32 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i32 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i32 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %c = add i32 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %d = sub i32 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %e = mul i32 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %f = ashr i32 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %g = lshr i32 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %h = shl i32 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %i = and i32 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %j = or i32 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: %k = xor i32 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8R-LABEL: 'i32' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i32 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i32 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i32 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i32 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i32 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'i32' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = sub i32 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = mul i32 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i32 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i32 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i32 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c = add i32 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d = sub i32 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e = mul i32 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %f = ashr i32 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %g = lshr i32 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %h = shl i32 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i = and i32 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j = or i32 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k = xor i32 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c = add i32 undef, undef %d = sub i32 undef, undef @@ -303,64 +254,52 @@ define void @i32() { define void @i64() { ; CHECK-LABEL: 'i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d = sub i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = mul i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = ashr i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g = lshr i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h = shl i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEXT: Cost Model: Found costs of 2 for: %c = add i64 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %d = sub i64 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %e = mul i64 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %f = ashr i64 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %g = lshr i64 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %h = shl i64 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %i = and i64 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %j = or i64 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %k = xor i64 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i64' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d = sub i64 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = mul i64 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = ashr i64 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g = lshr i64 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h = shl i64 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %c = add i64 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %d = sub i64 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %e = mul i64 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %f = ashr i64 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %g = lshr i64 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %h = shl i64 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %i = and i64 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %j = or i64 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %k = xor i64 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8M-BASE-LABEL: 'i64' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d = sub i64 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = mul i64 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = ashr i64 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g = lshr i64 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h = shl i64 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %c = add i64 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %d = sub i64 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %e = mul i64 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %f = ashr i64 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %g = lshr i64 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %h = shl i64 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %i = and i64 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %j = or i64 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %k = xor i64 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8R-LABEL: 'i64' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d = sub i64 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = mul i64 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = ashr i64 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g = lshr i64 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h = shl i64 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'i64' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d = sub i64 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = mul i64 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = ashr i64 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g = lshr i64 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h = shl i64 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c = add i64 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %d = sub i64 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %e = mul i64 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f = ashr i64 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g = lshr i64 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h = shl i64 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %i = and i64 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %j = or i64 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %k = xor i64 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c = add i64 undef, undef %d = sub i64 undef, undef @@ -377,277 +316,238 @@ define void @i64() { define void @vi8() { ; CHECK-MVE1-LABEL: 'vi8' -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c8 = add <8 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d8 = sub <8 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e8 = mul <8 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f8 = ashr <8 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g8 = lshr <8 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h8 = shl <8 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %c2 = add <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %d2 = sub <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %e2 = mul <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %f2 = ashr <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %g2 = lshr <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %h2 = shl <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %i2 = and <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %j2 = or <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %k2 = xor <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %c4 = add <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %d4 = sub <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %e4 = mul <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %f4 = ashr <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %g4 = lshr <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %h4 = shl <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %i4 = and <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %j4 = or <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %k4 = xor <4 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %c8 = add <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %d8 = sub <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %e8 = mul <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %f8 = ashr <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %g8 = lshr <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %h8 = shl <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %i8 = and <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %j8 = or <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %k8 = xor <8 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %c16 = add <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %d16 = sub <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %e16 = mul <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %f16 = ashr <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %g16 = lshr <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %h16 = shl <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %i16 = and <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %j16 = or <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %k16 = xor <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE2-LABEL: 'vi8' -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = add <4 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = sub <4 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = mul <4 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c16 = add <16 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d16 = sub <16 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e16 = mul <16 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = ashr <16 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g16 = lshr <16 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h16 = shl <16 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %c2 = add <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %d2 = sub <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %e2 = mul <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %f2 = ashr <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %g2 = lshr <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %h2 = shl <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %i2 = and <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %j2 = or <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %k2 = xor <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c4 = add <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d4 = sub <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e4 = mul <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %f4 = ashr <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %g4 = lshr <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %h4 = shl <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %i4 = and <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %j4 = or <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %k4 = xor <4 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c8 = add <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d8 = sub <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e8 = mul <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %f8 = ashr <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %g8 = lshr <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %h8 = shl <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %i8 = and <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %j8 = or <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %k8 = xor <8 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c16 = add <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d16 = sub <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e16 = mul <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %f16 = ashr <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %g16 = lshr <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %h16 = shl <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %i16 = and <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %j16 = or <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %k16 = xor <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE4-LABEL: 'vi8' -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %c2 = add <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %d2 = sub <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %e2 = mul <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %f2 = ashr <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %g2 = lshr <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %h2 = shl <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %i2 = and <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %j2 = or <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %k2 = xor <2 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %c4 = add <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %d4 = sub <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %e4 = mul <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %f4 = ashr <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %g4 = lshr <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %h4 = shl <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %i4 = and <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %j4 = or <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %k4 = xor <4 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %c8 = add <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %d8 = sub <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %e8 = mul <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %f8 = ashr <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %g8 = lshr <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %h8 = shl <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %i8 = and <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %j8 = or <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %k8 = xor <8 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %c16 = add <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %d16 = sub <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %e16 = mul <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %f16 = ashr <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %g16 = lshr <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %h16 = shl <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %i16 = and <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %j16 = or <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %k16 = xor <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi8' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d2 = sub <2 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = mul <2 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f2 = ashr <2 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g2 = lshr <2 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h2 = shl <2 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c8 = add <8 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d8 = sub <8 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e8 = mul <8 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f8 = ashr <8 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g8 = lshr <8 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h8 = shl <8 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c16 = add <16 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d16 = sub <16 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e16 = mul <16 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f16 = ashr <16 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g16 = lshr <16 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h16 = shl <16 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %c2 = add <2 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %d2 = sub <2 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %e2 = mul <2 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %f2 = ashr <2 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %g2 = lshr <2 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %h2 = shl <2 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %i2 = and <2 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %j2 = or <2 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %k2 = xor <2 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %c4 = add <4 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %d4 = sub <4 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %e4 = mul <4 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %f4 = ashr <4 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %g4 = lshr <4 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %h4 = shl <4 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %i4 = and <4 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %j4 = or <4 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %k4 = xor <4 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %c8 = add <8 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %d8 = sub <8 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %e8 = mul <8 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %f8 = ashr <8 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %g8 = lshr <8 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %h8 = shl <8 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %i8 = and <8 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %j8 = or <8 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %k8 = xor <8 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %c16 = add <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %d16 = sub <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %e16 = mul <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %f16 = ashr <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %g16 = lshr <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %h16 = shl <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %i16 = and <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %j16 = or <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %k16 = xor <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi8' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d2 = sub <2 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = mul <2 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f2 = ashr <2 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g2 = lshr <2 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h2 = shl <2 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c8 = add <8 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d8 = sub <8 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e8 = mul <8 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f8 = ashr <8 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g8 = lshr <8 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h8 = shl <8 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c16 = add <16 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d16 = sub <16 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e16 = mul <16 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f16 = ashr <16 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g16 = lshr <16 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h16 = shl <16 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %c2 = add <2 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %d2 = sub <2 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %e2 = mul <2 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %f2 = ashr <2 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %g2 = lshr <2 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %h2 = shl <2 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %i2 = and <2 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %j2 = or <2 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %k2 = xor <2 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %c4 = add <4 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %d4 = sub <4 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %e4 = mul <4 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %f4 = ashr <4 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %g4 = lshr <4 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %h4 = shl <4 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %i4 = and <4 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %j4 = or <4 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %k4 = xor <4 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %c8 = add <8 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %d8 = sub <8 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %e8 = mul <8 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %f8 = ashr <8 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %g8 = lshr <8 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %h8 = shl <8 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %i8 = and <8 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %j8 = or <8 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %k8 = xor <8 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %c16 = add <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %d16 = sub <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %e16 = mul <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %f16 = ashr <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %g16 = lshr <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %h16 = shl <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %i16 = and <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %j16 = or <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %k16 = xor <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8R-LABEL: 'vi8' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f2 = ashr <2 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g2 = lshr <2 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h2 = shl <2 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = ashr <16 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g16 = lshr <16 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h16 = shl <16 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'vi8' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c2 = add <2 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d2 = sub <2 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e2 = mul <2 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f2 = ashr <2 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g2 = lshr <2 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h2 = shl <2 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i2 = and <2 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j2 = or <2 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k2 = xor <2 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c4 = add <4 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d4 = sub <4 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e4 = mul <4 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f4 = ashr <4 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g4 = lshr <4 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h4 = shl <4 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i4 = and <4 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j4 = or <4 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k4 = xor <4 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c8 = add <8 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d8 = sub <8 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e8 = mul <8 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f8 = ashr <8 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g8 = lshr <8 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h8 = shl <8 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i8 = and <8 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j8 = or <8 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k8 = xor <8 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c16 = add <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d16 = sub <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e16 = mul <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f16 = ashr <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g16 = lshr <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h16 = shl <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i16 = and <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j16 = or <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k16 = xor <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c2 = add <2 x i8> undef, undef %d2 = sub <2 x i8> undef, undef @@ -690,277 +590,238 @@ define void @vi8() { define void @vi16() { ; CHECK-MVE1-LABEL: 'vi16' -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c8 = add <8 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d8 = sub <8 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e8 = mul <8 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f8 = ashr <8 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g8 = lshr <8 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h8 = shl <8 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c16 = add <16 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d16 = sub <16 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e16 = mul <16 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f16 = ashr <16 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g16 = lshr <16 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h16 = shl <16 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %c2 = add <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %d2 = sub <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %e2 = mul <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %f2 = ashr <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %g2 = lshr <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %h2 = shl <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %i2 = and <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %j2 = or <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %k2 = xor <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %c4 = add <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %d4 = sub <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %e4 = mul <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %f4 = ashr <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %g4 = lshr <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %h4 = shl <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %i4 = and <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %j4 = or <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %k4 = xor <4 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %c8 = add <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %d8 = sub <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %e8 = mul <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %f8 = ashr <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %g8 = lshr <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %h8 = shl <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %i8 = and <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %j8 = or <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %k8 = xor <8 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %c16 = add <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %d16 = sub <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %e16 = mul <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %f16 = ashr <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %g16 = lshr <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %h16 = shl <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %i16 = and <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %j16 = or <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %k16 = xor <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE2-LABEL: 'vi16' -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = add <4 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = sub <4 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = mul <4 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %c2 = add <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %d2 = sub <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %e2 = mul <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %f2 = ashr <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %g2 = lshr <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %h2 = shl <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %i2 = and <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %j2 = or <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %k2 = xor <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c4 = add <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d4 = sub <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e4 = mul <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %f4 = ashr <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %g4 = lshr <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %h4 = shl <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %i4 = and <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %j4 = or <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %k4 = xor <4 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c8 = add <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d8 = sub <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e8 = mul <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %f8 = ashr <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %g8 = lshr <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %h8 = shl <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %i8 = and <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %j8 = or <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %k8 = xor <8 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %c16 = add <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %d16 = sub <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %e16 = mul <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %f16 = ashr <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %g16 = lshr <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %h16 = shl <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %i16 = and <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %j16 = or <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %k16 = xor <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE4-LABEL: 'vi16' -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c16 = add <16 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d16 = sub <16 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e16 = mul <16 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = ashr <16 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g16 = lshr <16 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h16 = shl <16 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %c2 = add <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %d2 = sub <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %e2 = mul <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %f2 = ashr <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %g2 = lshr <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %h2 = shl <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %i2 = and <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %j2 = or <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %k2 = xor <2 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %c4 = add <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %d4 = sub <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %e4 = mul <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %f4 = ashr <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %g4 = lshr <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %h4 = shl <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %i4 = and <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %j4 = or <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %k4 = xor <4 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %c8 = add <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %d8 = sub <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %e8 = mul <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %f8 = ashr <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %g8 = lshr <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %h8 = shl <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %i8 = and <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %j8 = or <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %k8 = xor <8 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %c16 = add <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %d16 = sub <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %e16 = mul <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %f16 = ashr <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %g16 = lshr <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %h16 = shl <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %i16 = and <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %j16 = or <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %k16 = xor <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi16' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d2 = sub <2 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = mul <2 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f2 = ashr <2 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g2 = lshr <2 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h2 = shl <2 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c8 = add <8 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d8 = sub <8 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e8 = mul <8 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f8 = ashr <8 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g8 = lshr <8 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h8 = shl <8 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c16 = add <16 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d16 = sub <16 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e16 = mul <16 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f16 = ashr <16 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g16 = lshr <16 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h16 = shl <16 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %c2 = add <2 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %d2 = sub <2 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %e2 = mul <2 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %f2 = ashr <2 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %g2 = lshr <2 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %h2 = shl <2 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %i2 = and <2 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %j2 = or <2 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %k2 = xor <2 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %c4 = add <4 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %d4 = sub <4 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %e4 = mul <4 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %f4 = ashr <4 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %g4 = lshr <4 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %h4 = shl <4 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %i4 = and <4 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %j4 = or <4 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %k4 = xor <4 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %c8 = add <8 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %d8 = sub <8 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %e8 = mul <8 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %f8 = ashr <8 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %g8 = lshr <8 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %h8 = shl <8 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %i8 = and <8 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %j8 = or <8 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %k8 = xor <8 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %c16 = add <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %d16 = sub <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %e16 = mul <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %f16 = ashr <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %g16 = lshr <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %h16 = shl <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %i16 = and <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %j16 = or <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %k16 = xor <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi16' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d2 = sub <2 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = mul <2 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f2 = ashr <2 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g2 = lshr <2 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h2 = shl <2 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c8 = add <8 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d8 = sub <8 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e8 = mul <8 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f8 = ashr <8 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g8 = lshr <8 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h8 = shl <8 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c16 = add <16 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d16 = sub <16 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e16 = mul <16 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f16 = ashr <16 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g16 = lshr <16 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h16 = shl <16 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %c2 = add <2 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %d2 = sub <2 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %e2 = mul <2 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %f2 = ashr <2 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %g2 = lshr <2 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %h2 = shl <2 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %i2 = and <2 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %j2 = or <2 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %k2 = xor <2 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %c4 = add <4 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %d4 = sub <4 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %e4 = mul <4 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %f4 = ashr <4 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %g4 = lshr <4 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %h4 = shl <4 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %i4 = and <4 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %j4 = or <4 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %k4 = xor <4 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %c8 = add <8 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %d8 = sub <8 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %e8 = mul <8 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %f8 = ashr <8 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %g8 = lshr <8 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %h8 = shl <8 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %i8 = and <8 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %j8 = or <8 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %k8 = xor <8 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %c16 = add <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %d16 = sub <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %e16 = mul <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %f16 = ashr <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %g16 = lshr <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %h16 = shl <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %i16 = and <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %j16 = or <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %k16 = xor <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8R-LABEL: 'vi16' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f2 = ashr <2 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g2 = lshr <2 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h2 = shl <2 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c16 = add <16 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d16 = sub <16 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'vi16' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c16 = add <16 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d16 = sub <16 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e16 = mul <16 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = ashr <16 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g16 = lshr <16 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h16 = shl <16 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c2 = add <2 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d2 = sub <2 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e2 = mul <2 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f2 = ashr <2 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g2 = lshr <2 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h2 = shl <2 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i2 = and <2 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j2 = or <2 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k2 = xor <2 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c4 = add <4 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d4 = sub <4 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e4 = mul <4 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f4 = ashr <4 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g4 = lshr <4 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h4 = shl <4 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i4 = and <4 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j4 = or <4 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k4 = xor <4 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c8 = add <8 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d8 = sub <8 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %e8 = mul <8 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f8 = ashr <8 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g8 = lshr <8 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h8 = shl <8 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i8 = and <8 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j8 = or <8 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k8 = xor <8 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c16 = add <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %d16 = sub <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %e16 = mul <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %f16 = ashr <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %g16 = lshr <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %h16 = shl <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %i16 = and <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %j16 = or <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %k16 = xor <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c2 = add <2 x i16> undef, undef %d2 = sub <2 x i16> undef, undef @@ -1003,277 +864,238 @@ define void @vi16() { define void @vi32() { ; CHECK-MVE1-LABEL: 'vi32' -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c8 = add <8 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d8 = sub <8 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e8 = mul <8 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f8 = ashr <8 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g8 = lshr <8 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h8 = shl <8 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c16 = add <16 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d16 = sub <16 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e16 = mul <16 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f16 = ashr <16 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g16 = lshr <16 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h16 = shl <16 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %c2 = add <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %d2 = sub <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %e2 = mul <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %f2 = ashr <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %g2 = lshr <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 10 for: %h2 = shl <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %i2 = and <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %j2 = or <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %k2 = xor <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %c4 = add <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %d4 = sub <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %e4 = mul <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %f4 = ashr <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %g4 = lshr <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %h4 = shl <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %i4 = and <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %j4 = or <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %k4 = xor <4 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %c8 = add <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %d8 = sub <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %e8 = mul <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %f8 = ashr <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %g8 = lshr <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %h8 = shl <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %i8 = and <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %j8 = or <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %k8 = xor <8 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %c16 = add <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %d16 = sub <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %e16 = mul <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %f16 = ashr <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %g16 = lshr <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %h16 = shl <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %i16 = and <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %j16 = or <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %k16 = xor <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE2-LABEL: 'vi32' -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = add <4 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = sub <4 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = mul <4 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c8 = add <8 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d8 = sub <8 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e8 = mul <8 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f8 = ashr <8 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g8 = lshr <8 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h8 = shl <8 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c16 = add <16 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d16 = sub <16 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e16 = mul <16 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f16 = ashr <16 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g16 = lshr <16 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h16 = shl <16 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %c2 = add <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %d2 = sub <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %e2 = mul <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %f2 = ashr <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %g2 = lshr <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 10 for: %h2 = shl <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %i2 = and <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %j2 = or <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %k2 = xor <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c4 = add <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d4 = sub <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e4 = mul <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %f4 = ashr <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %g4 = lshr <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %h4 = shl <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %i4 = and <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %j4 = or <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %k4 = xor <4 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %c8 = add <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %d8 = sub <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %e8 = mul <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %f8 = ashr <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %g8 = lshr <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %h8 = shl <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %i8 = and <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %j8 = or <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %k8 = xor <8 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %c16 = add <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %d16 = sub <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %e16 = mul <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %f16 = ashr <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %g16 = lshr <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %h16 = shl <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %i16 = and <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %j16 = or <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %k16 = xor <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE4-LABEL: 'vi32' -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %c2 = add <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %d2 = sub <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %e2 = mul <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %f2 = ashr <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %g2 = lshr <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 10 for: %h2 = shl <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %i2 = and <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %j2 = or <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %k2 = xor <2 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %c4 = add <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %d4 = sub <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %e4 = mul <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %f4 = ashr <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %g4 = lshr <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %h4 = shl <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %i4 = and <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %j4 = or <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %k4 = xor <4 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %c8 = add <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %d8 = sub <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %e8 = mul <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %f8 = ashr <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %g8 = lshr <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %h8 = shl <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %i8 = and <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %j8 = or <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %k8 = xor <8 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %c16 = add <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %d16 = sub <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %e16 = mul <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %f16 = ashr <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %g16 = lshr <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %h16 = shl <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %i16 = and <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %j16 = or <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %k16 = xor <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi32' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d2 = sub <2 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = mul <2 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f2 = ashr <2 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g2 = lshr <2 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h2 = shl <2 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c8 = add <8 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d8 = sub <8 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e8 = mul <8 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f8 = ashr <8 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g8 = lshr <8 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h8 = shl <8 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c16 = add <16 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d16 = sub <16 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e16 = mul <16 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f16 = ashr <16 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g16 = lshr <16 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h16 = shl <16 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %c2 = add <2 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %d2 = sub <2 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %e2 = mul <2 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %f2 = ashr <2 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %g2 = lshr <2 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %h2 = shl <2 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %i2 = and <2 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %j2 = or <2 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 2 for: %k2 = xor <2 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %c4 = add <4 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %d4 = sub <4 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %e4 = mul <4 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %f4 = ashr <4 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %g4 = lshr <4 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %h4 = shl <4 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %i4 = and <4 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %j4 = or <4 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %k4 = xor <4 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %c8 = add <8 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %d8 = sub <8 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %e8 = mul <8 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %f8 = ashr <8 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %g8 = lshr <8 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %h8 = shl <8 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %i8 = and <8 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %j8 = or <8 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %k8 = xor <8 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %c16 = add <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %d16 = sub <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %e16 = mul <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %f16 = ashr <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %g16 = lshr <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %h16 = shl <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %i16 = and <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %j16 = or <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %k16 = xor <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi32' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d2 = sub <2 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = mul <2 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f2 = ashr <2 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g2 = lshr <2 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h2 = shl <2 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c4 = add <4 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d4 = sub <4 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c8 = add <8 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d8 = sub <8 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e8 = mul <8 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f8 = ashr <8 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g8 = lshr <8 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h8 = shl <8 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c16 = add <16 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d16 = sub <16 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e16 = mul <16 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f16 = ashr <16 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g16 = lshr <16 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h16 = shl <16 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %c2 = add <2 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %d2 = sub <2 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %e2 = mul <2 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %f2 = ashr <2 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %g2 = lshr <2 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %h2 = shl <2 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %i2 = and <2 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %j2 = or <2 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 2 for: %k2 = xor <2 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %c4 = add <4 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %d4 = sub <4 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %e4 = mul <4 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %f4 = ashr <4 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %g4 = lshr <4 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %h4 = shl <4 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %i4 = and <4 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %j4 = or <4 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %k4 = xor <4 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %c8 = add <8 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %d8 = sub <8 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %e8 = mul <8 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %f8 = ashr <8 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %g8 = lshr <8 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %h8 = shl <8 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %i8 = and <8 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %j8 = or <8 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %k8 = xor <8 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %c16 = add <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %d16 = sub <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %e16 = mul <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %f16 = ashr <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %g16 = lshr <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %h16 = shl <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %i16 = and <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %j16 = or <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %k16 = xor <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8R-LABEL: 'vi32' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f2 = ashr <2 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g2 = lshr <2 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h2 = shl <2 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = mul <4 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e8 = mul <8 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f8 = ashr <8 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g8 = lshr <8 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h8 = shl <8 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e16 = mul <16 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f16 = ashr <16 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g16 = lshr <16 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h16 = shl <16 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'vi32' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c2 = add <2 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d2 = sub <2 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %e2 = mul <2 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f2 = ashr <2 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g2 = lshr <2 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h2 = shl <2 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i2 = and <2 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j2 = or <2 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k2 = xor <2 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c4 = add <4 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d4 = sub <4 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %e4 = mul <4 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f4 = ashr <4 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g4 = lshr <4 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h4 = shl <4 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i4 = and <4 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j4 = or <4 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k4 = xor <4 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c8 = add <8 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %d8 = sub <8 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %e8 = mul <8 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %f8 = ashr <8 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %g8 = lshr <8 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %h8 = shl <8 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %i8 = and <8 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %j8 = or <8 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %k8 = xor <8 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %c16 = add <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %d16 = sub <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %e16 = mul <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %f16 = ashr <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %g16 = lshr <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %h16 = shl <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %i16 = and <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %j16 = or <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %k16 = xor <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c2 = add <2 x i32> undef, undef %d2 = sub <2 x i32> undef, undef @@ -1316,277 +1138,238 @@ define void @vi32() { define void @vi64() { ; CHECK-MVE1-LABEL: 'vi64' -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %h2 = shl <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %c4 = add <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %d4 = sub <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = mul <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f4 = ashr <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %g4 = lshr <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %h4 = shl <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i4 = and <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j4 = or <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k4 = xor <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %c8 = add <8 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %d8 = sub <8 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e8 = mul <8 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f8 = ashr <8 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %g8 = lshr <8 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %h8 = shl <8 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i8 = and <8 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j8 = or <8 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %h16 = shl <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE1-NEXT: Cost Model: Found costs of 20 for: %c2 = add <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 20 for: %d2 = sub <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 20 for: %e2 = mul <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 20 for: %f2 = ashr <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 20 for: %g2 = lshr <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 20 for: %h2 = shl <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %i2 = and <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %j2 = or <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %k2 = xor <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 40 for: %c4 = add <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 40 for: %d4 = sub <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 40 for: %e4 = mul <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 40 for: %f4 = ashr <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 40 for: %g4 = lshr <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 40 for: %h4 = shl <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %i4 = and <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %j4 = or <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %k4 = xor <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 80 for: %c8 = add <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 80 for: %d8 = sub <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 80 for: %e8 = mul <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 80 for: %f8 = ashr <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 80 for: %g8 = lshr <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 80 for: %h8 = shl <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %i8 = and <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %j8 = or <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:16 SizeLat:16 for: %k8 = xor <8 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 160 for: %c16 = add <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 160 for: %d16 = sub <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 160 for: %e16 = mul <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 160 for: %f16 = ashr <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 160 for: %g16 = lshr <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of 160 for: %h16 = shl <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:32 SizeLat:32 for: %i16 = and <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:32 SizeLat:32 for: %j16 = or <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:32 SizeLat:32 for: %k16 = xor <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE2-LABEL: 'vi64' -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %h2 = shl <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %c4 = add <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %d4 = sub <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = mul <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f4 = ashr <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %g4 = lshr <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %h4 = shl <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %c8 = add <8 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %d8 = sub <8 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e8 = mul <8 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f8 = ashr <8 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %g8 = lshr <8 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %h8 = shl <8 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %h16 = shl <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE2-NEXT: Cost Model: Found costs of 20 for: %c2 = add <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 20 for: %d2 = sub <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 20 for: %e2 = mul <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 20 for: %f2 = ashr <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 20 for: %g2 = lshr <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 20 for: %h2 = shl <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %i2 = and <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %j2 = or <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %k2 = xor <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 40 for: %c4 = add <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 40 for: %d4 = sub <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 40 for: %e4 = mul <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 40 for: %f4 = ashr <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 40 for: %g4 = lshr <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 40 for: %h4 = shl <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %i4 = and <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %j4 = or <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %k4 = xor <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 80 for: %c8 = add <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 80 for: %d8 = sub <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 80 for: %e8 = mul <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 80 for: %f8 = ashr <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 80 for: %g8 = lshr <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 80 for: %h8 = shl <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %i8 = and <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %j8 = or <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %k8 = xor <8 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 160 for: %c16 = add <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 160 for: %d16 = sub <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 160 for: %e16 = mul <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 160 for: %f16 = ashr <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 160 for: %g16 = lshr <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of 160 for: %h16 = shl <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %i16 = and <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %j16 = or <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %k16 = xor <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE4-LABEL: 'vi64' -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %h2 = shl <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %c4 = add <4 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %d4 = sub <4 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = mul <4 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f4 = ashr <4 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %g4 = lshr <4 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %h4 = shl <4 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %c8 = add <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %d8 = sub <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e8 = mul <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f8 = ashr <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %g8 = lshr <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %h8 = shl <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %h16 = shl <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE4-NEXT: Cost Model: Found costs of 20 for: %c2 = add <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 20 for: %d2 = sub <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 20 for: %e2 = mul <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 20 for: %f2 = ashr <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 20 for: %g2 = lshr <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 20 for: %h2 = shl <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %i2 = and <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %j2 = or <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 1 for: %k2 = xor <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 40 for: %c4 = add <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 40 for: %d4 = sub <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 40 for: %e4 = mul <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 40 for: %f4 = ashr <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 40 for: %g4 = lshr <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 40 for: %h4 = shl <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %i4 = and <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %j4 = or <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 2 for: %k4 = xor <4 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 80 for: %c8 = add <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 80 for: %d8 = sub <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 80 for: %e8 = mul <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 80 for: %f8 = ashr <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 80 for: %g8 = lshr <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 80 for: %h8 = shl <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %i8 = and <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %j8 = or <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 4 for: %k8 = xor <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 160 for: %c16 = add <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 160 for: %d16 = sub <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 160 for: %e16 = mul <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 160 for: %f16 = ashr <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 160 for: %g16 = lshr <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 160 for: %h16 = shl <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 8 for: %i16 = and <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 8 for: %j16 = or <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of 8 for: %k16 = xor <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi64' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h2 = shl <2 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c4 = add <4 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d4 = sub <4 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e4 = mul <4 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = ashr <4 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g4 = lshr <4 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h4 = shl <4 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i4 = and <4 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j4 = or <4 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k4 = xor <4 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c8 = add <8 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d8 = sub <8 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e8 = mul <8 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f8 = ashr <8 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g8 = lshr <8 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h8 = shl <8 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i8 = and <8 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j8 = or <8 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %h16 = shl <16 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %c2 = add <2 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %d2 = sub <2 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %e2 = mul <2 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %f2 = ashr <2 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %g2 = lshr <2 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %h2 = shl <2 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %i2 = and <2 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %j2 = or <2 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 4 for: %k2 = xor <2 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %c4 = add <4 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %d4 = sub <4 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %e4 = mul <4 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %f4 = ashr <4 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %g4 = lshr <4 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %h4 = shl <4 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %i4 = and <4 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %j4 = or <4 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 8 for: %k4 = xor <4 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %c8 = add <8 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %d8 = sub <8 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %e8 = mul <8 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %f8 = ashr <8 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %g8 = lshr <8 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %h8 = shl <8 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %i8 = and <8 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %j8 = or <8 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 16 for: %k8 = xor <8 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 32 for: %c16 = add <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 32 for: %d16 = sub <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 32 for: %e16 = mul <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 32 for: %f16 = ashr <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 32 for: %g16 = lshr <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 32 for: %h16 = shl <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 32 for: %i16 = and <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 32 for: %j16 = or <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 32 for: %k16 = xor <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi64' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h2 = shl <2 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c4 = add <4 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d4 = sub <4 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e4 = mul <4 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = ashr <4 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g4 = lshr <4 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h4 = shl <4 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i4 = and <4 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j4 = or <4 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k4 = xor <4 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c8 = add <8 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d8 = sub <8 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e8 = mul <8 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f8 = ashr <8 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g8 = lshr <8 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h8 = shl <8 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i8 = and <8 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j8 = or <8 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %h16 = shl <16 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %c2 = add <2 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %d2 = sub <2 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %e2 = mul <2 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %f2 = ashr <2 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %g2 = lshr <2 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %h2 = shl <2 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %i2 = and <2 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %j2 = or <2 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 4 for: %k2 = xor <2 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %c4 = add <4 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %d4 = sub <4 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %e4 = mul <4 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %f4 = ashr <4 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %g4 = lshr <4 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %h4 = shl <4 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %i4 = and <4 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %j4 = or <4 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 8 for: %k4 = xor <4 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %c8 = add <8 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %d8 = sub <8 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %e8 = mul <8 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %f8 = ashr <8 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %g8 = lshr <8 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %h8 = shl <8 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %i8 = and <8 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %j8 = or <8 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 16 for: %k8 = xor <8 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 32 for: %c16 = add <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 32 for: %d16 = sub <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 32 for: %e16 = mul <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 32 for: %f16 = ashr <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 32 for: %g16 = lshr <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 32 for: %h16 = shl <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 32 for: %i16 = and <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 32 for: %j16 = or <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 32 for: %k16 = xor <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V8R-LABEL: 'vi64' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %h2 = shl <2 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = add <4 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = sub <4 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e4 = mul <4 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f4 = ashr <4 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g4 = lshr <4 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h4 = shl <4 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c8 = add <8 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d8 = sub <8 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e8 = mul <8 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f8 = ashr <8 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g8 = lshr <8 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h8 = shl <8 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %h16 = shl <16 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'vi64' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %h2 = shl <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %c4 = add <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %d4 = sub <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = mul <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f4 = ashr <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %g4 = lshr <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %h4 = shl <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %c8 = add <8 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %d8 = sub <8 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e8 = mul <8 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f8 = ashr <8 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %g8 = lshr <8 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %h8 = shl <8 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %h16 = shl <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %c2 = add <2 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %d2 = sub <2 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %e2 = mul <2 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f2 = ashr <2 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g2 = lshr <2 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h2 = shl <2 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %i2 = and <2 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %j2 = or <2 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 1 for: %k2 = xor <2 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c4 = add <4 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %d4 = sub <4 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %e4 = mul <4 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %f4 = ashr <4 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %g4 = lshr <4 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %h4 = shl <4 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %i4 = and <4 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %j4 = or <4 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %k4 = xor <4 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %c8 = add <8 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %d8 = sub <8 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %e8 = mul <8 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %f8 = ashr <8 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %g8 = lshr <8 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %h8 = shl <8 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %i8 = and <8 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %j8 = or <8 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %k8 = xor <8 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %c16 = add <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %d16 = sub <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %e16 = mul <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %f16 = ashr <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %g16 = lshr <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %h16 = shl <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %i16 = and <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %j16 = or <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %k16 = xor <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c2 = add <2 x i64> undef, undef %d2 = sub <2 x i64> undef, undef diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll index 6c974af..673bf38 100644 --- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll @@ -1,1579 +1,2413 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d < %s | FileCheck %s --check-prefixes=CHECK,NO-ZFHMIN ; Check that we don't crash querying costs when vectors are not enabled. ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 define void @fadd() { ; CHECK-LABEL: 'fadd' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fadd double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fadd <1 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fadd <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fadd <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fadd <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fadd <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fadd <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fadd <vscale x 1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fadd <vscale x 2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fadd <vscale x 4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fadd <vscale x 8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fadd <vscale x 16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fadd <1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fadd <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fadd <vscale x 1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fadd <vscale x 2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fadd <vscale x 4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fadd <vscale x 8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fadd.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fadd.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fadd double poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fadd <16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fadd <vscale x 1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fadd <vscale x 2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fadd <vscale x 4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fadd <vscale x 8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fadd <vscale x 16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fadd <1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fadd <8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fadd <vscale x 1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fadd <vscale x 2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fadd <vscale x 4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fadd <vscale x 8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fadd.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fadd.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %BF16 = fadd bfloat undef, undef - %F32 = fadd float undef, undef - %F64 = fadd double undef, undef - - %V1BF16 = fadd <1 x bfloat> undef, undef - %V2BF16 = fadd <2 x bfloat> undef, undef - %V4BF16 = fadd <4 x bfloat> undef, undef - %V8BF16 = fadd <8 x bfloat> undef, undef - %V16BF16 = fadd <16 x bfloat> undef, undef - - %NXV1BF16 = fadd <vscale x 1 x bfloat> undef, undef - %NXV2BF16 = fadd <vscale x 2 x bfloat> undef, undef - %NXV4BF16 = fadd <vscale x 4 x bfloat> undef, undef - %NXV8BF16 = fadd <vscale x 8 x bfloat> undef, undef - %NXV16BF16 = fadd <vscale x 16 x bfloat> undef, undef - - %V1F32 = fadd <1 x float> undef, undef - %V2F32 = fadd <2 x float> undef, undef - %V4F32 = fadd <4 x float> undef, undef - %V8F32 = fadd <8 x float> undef, undef - %V16F32 = fadd <16 x float> undef, undef - - %NXV1F32 = fadd <vscale x 1 x float> undef, undef - %NXV2F32 = fadd <vscale x 2 x float> undef, undef - %NXV4F32 = fadd <vscale x 4 x float> undef, undef - %NXV8F32 = fadd <vscale x 8 x float> undef, undef - %NXV16F32 = fadd <vscale x 16 x float> undef, undef - - %V1F64 = fadd <1 x double> undef, undef - %V2F64 = fadd <2 x double> undef, undef - %V4F64 = fadd <4 x double> undef, undef - %V8F64 = fadd <8 x double> undef, undef - - %NXV1F64 = fadd <vscale x 1 x double> undef, undef - %NXV2F64 = fadd <vscale x 2 x double> undef, undef - %NXV4F64 = fadd <vscale x 4 x double> undef, undef - %NXV8F64 = fadd <vscale x 8 x double> undef, undef - - %V1F16_VP = call <1 x bfloat> @llvm.vp.fadd.v1f16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef) - %V2F16_VP = call <2 x bfloat> @llvm.vp.fadd.v2f16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef) - %V4F16_VP = call <4 x bfloat> @llvm.vp.fadd.v4f16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef) - %V8F16_VP = call <8 x bfloat> @llvm.vp.fadd.v8f16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef) - %V16F16_VP = call <16 x bfloat> @llvm.vp.fadd.v16f16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef) - - %V1F32_VP = call <1 x float> @llvm.vp.fadd.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef) - %V2F32_VP = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) - %V4F32_VP = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) - %V8F32_VP = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) - %V16F32_VP = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) - - %V1F64_VP = call <1 x double> @llvm.vp.fadd.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef) - %V2F64_VP = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) - %V4F64_VP = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) - %V8F64_VP = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) - - %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) + %F32 = fadd float poison, poison + %F64 = fadd double poison, poison + + %V1F32 = fadd <1 x float> poison, poison + %V2F32 = fadd <2 x float> poison, poison + %V4F32 = fadd <4 x float> poison, poison + %V8F32 = fadd <8 x float> poison, poison + %V16F32 = fadd <16 x float> poison, poison + + %NXV1F32 = fadd <vscale x 1 x float> poison, poison + %NXV2F32 = fadd <vscale x 2 x float> poison, poison + %NXV4F32 = fadd <vscale x 4 x float> poison, poison + %NXV8F32 = fadd <vscale x 8 x float> poison, poison + %NXV16F32 = fadd <vscale x 16 x float> poison, poison + + %V1F64 = fadd <1 x double> poison, poison + %V2F64 = fadd <2 x double> poison, poison + %V4F64 = fadd <4 x double> poison, poison + %V8F64 = fadd <8 x double> poison, poison + + %NXV1F64 = fadd <vscale x 1 x double> poison, poison + %NXV2F64 = fadd <vscale x 2 x double> poison, poison + %NXV4F64 = fadd <vscale x 4 x double> poison, poison + %NXV8F64 = fadd <vscale x 8 x double> poison, poison + + %V1F32_VP = call <1 x float> @llvm.vp.fadd(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) + %V2F32_VP = call <2 x float> @llvm.vp.fadd(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) + %V4F32_VP = call <4 x float> @llvm.vp.fadd(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) + %V8F32_VP = call <8 x float> @llvm.vp.fadd(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) + %V16F32_VP = call <16 x float> @llvm.vp.fadd(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) + + %V1F64_VP = call <1 x double> @llvm.vp.fadd(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) + %V2F64_VP = call <2 x double> @llvm.vp.fadd(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) + %V4F64_VP = call <4 x double> @llvm.vp.fadd(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) + %V8F64_VP = call <8 x double> @llvm.vp.fadd(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) + + %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fadd(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fadd(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fadd(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fadd(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fadd(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) + + %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fadd(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fadd(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fadd(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fadd(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) + + ret void +} + +define void @fadd_bf16() { +; ZVFH-LABEL: 'fadd_bf16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fadd <1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fadd <2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fadd <4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fadd <8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fadd <16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fadd_bf16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fadd <1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fadd <2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fadd <4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fadd <8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fadd <16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'fadd_bf16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fadd <1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fadd <2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fadd <4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fadd <8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fadd <16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %BF16 = fadd bfloat poison, poison + + %V1BF16 = fadd <1 x bfloat> poison, poison + %V2BF16 = fadd <2 x bfloat> poison, poison + %V4BF16 = fadd <4 x bfloat> poison, poison + %V8BF16 = fadd <8 x bfloat> poison, poison + %V16BF16 = fadd <16 x bfloat> poison, poison + + %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison + %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison + %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison + %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison + %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison + + %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) + %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) + %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) + %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) + %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) + + %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @fadd_f16() { ; ZVFH-LABEL: 'fadd_f16' -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd <vscale x 1 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd <vscale x 2 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd <vscale x 4 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd <vscale x 8 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd <vscale x 16 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd <vscale x 1 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd <vscale x 2 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd <vscale x 4 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd <vscale x 8 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd <vscale x 16 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; ZVFHMIN-LABEL: 'fadd_f16' -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fadd <1 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fadd <2 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fadd <4 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fadd <8 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fadd <16 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fadd <32 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fadd <vscale x 1 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fadd <vscale x 2 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fadd <vscale x 4 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fadd <vscale x 8 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fadd <vscale x 16 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fadd <1 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fadd <2 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fadd <4 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fadd <8 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fadd <16 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fadd <32 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fadd <vscale x 1 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fadd <vscale x 2 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fadd <vscale x 4 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fadd <vscale x 8 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fadd <vscale x 16 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %F16 = fadd half undef, undef - - %V1F16 = fadd <1 x half> undef, undef - %V2F16 = fadd <2 x half> undef, undef - %V4F16 = fadd <4 x half> undef, undef - %V8F16 = fadd <8 x half> undef, undef - %V16F16 = fadd <16 x half> undef, undef - %V32F16 = fadd <32 x half> undef, undef - - %NXV1F16 = fadd <vscale x 1 x half> undef, undef - %NXV2F16 = fadd <vscale x 2 x half> undef, undef - %NXV4F16 = fadd <vscale x 4 x half> undef, undef - %NXV8F16 = fadd <vscale x 8 x half> undef, undef - %NXV16F16 = fadd <vscale x 16 x half> undef, undef - %NXV32F16 = fadd <vscale x 32 x half> undef, undef - - %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) - %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) - %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) - %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) - %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) - - %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; NO-ZFHMIN-LABEL: 'fadd_f16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fadd <2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fadd <4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fadd <8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fadd <16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fadd <32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = fadd <vscale x 1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = fadd <vscale x 2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = fadd <vscale x 4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = fadd <vscale x 8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = fadd <vscale x 16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = fadd <vscale x 32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %F16 = fadd half poison, poison + + %V1F16 = fadd <1 x half> poison, poison + %V2F16 = fadd <2 x half> poison, poison + %V4F16 = fadd <4 x half> poison, poison + %V8F16 = fadd <8 x half> poison, poison + %V16F16 = fadd <16 x half> poison, poison + %V32F16 = fadd <32 x half> poison, poison + + %NXV1F16 = fadd <vscale x 1 x half> poison, poison + %NXV2F16 = fadd <vscale x 2 x half> poison, poison + %NXV4F16 = fadd <vscale x 4 x half> poison, poison + %NXV8F16 = fadd <vscale x 8 x half> poison, poison + %NXV16F16 = fadd <vscale x 16 x half> poison, poison + %NXV32F16 = fadd <vscale x 32 x half> poison, poison + + %V1F16_VP = call <1 x half> @llvm.vp.fadd(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) + %V2F16_VP = call <2 x half> @llvm.vp.fadd(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) + %V4F16_VP = call <4 x half> @llvm.vp.fadd(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) + %V8F16_VP = call <8 x half> @llvm.vp.fadd(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) + %V16F16_VP = call <16 x half> @llvm.vp.fadd(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) + + %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @fsub() { ; CHECK-LABEL: 'fsub' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fsub <1 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fsub <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fsub <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fsub <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fsub <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fsub <vscale x 1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fsub <vscale x 2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fsub <vscale x 4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fsub <vscale x 8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fsub <vscale x 16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fsub <1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fsub <vscale x 1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fsub <vscale x 2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fsub <vscale x 4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fsub <vscale x 8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fsub.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fsub.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fsub <vscale x 1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fsub <vscale x 2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fsub <vscale x 4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fsub <vscale x 8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fsub <vscale x 16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fsub <1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fsub <vscale x 1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fsub <vscale x 2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fsub <vscale x 4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fsub <vscale x 8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fsub.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fsub.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %F16 = fsub half undef, undef - %F32 = fsub float undef, undef - %F64 = fsub double undef, undef - - %V1BF16 = fsub <1 x bfloat> undef, undef - %V2BF16 = fsub <2 x bfloat> undef, undef - %V4BF16 = fsub <4 x bfloat> undef, undef - %V8BF16 = fsub <8 x bfloat> undef, undef - %V16BF16 = fsub <16 x bfloat> undef, undef - - %NXV1BF16 = fsub <vscale x 1 x bfloat> undef, undef - %NXV2BF16 = fsub <vscale x 2 x bfloat> undef, undef - %NXV4BF16 = fsub <vscale x 4 x bfloat> undef, undef - %NXV8BF16 = fsub <vscale x 8 x bfloat> undef, undef - %NXV16BF16 = fsub <vscale x 16 x bfloat> undef, undef - - %V1F32 = fsub <1 x float> undef, undef - %V2F32 = fsub <2 x float> undef, undef - %V4F32 = fsub <4 x float> undef, undef - %V8F32 = fsub <8 x float> undef, undef - %V16F32 = fsub <16 x float> undef, undef - - %NXV1F32 = fsub <vscale x 1 x float> undef, undef - %NXV2F32 = fsub <vscale x 2 x float> undef, undef - %NXV4F32 = fsub <vscale x 4 x float> undef, undef - %NXV8F32 = fsub <vscale x 8 x float> undef, undef - %NXV16F32 = fsub <vscale x 16 x float> undef, undef - - %V1F64 = fsub <1 x double> undef, undef - %V2F64 = fsub <2 x double> undef, undef - %V4F64 = fsub <4 x double> undef, undef - %V8F64 = fsub <8 x double> undef, undef - - %NXV1F64 = fsub <vscale x 1 x double> undef, undef - %NXV2F64 = fsub <vscale x 2 x double> undef, undef - %NXV4F64 = fsub <vscale x 4 x double> undef, undef - %NXV8F64 = fsub <vscale x 8 x double> undef, undef - - %V1F16_VP = call <1 x bfloat> @llvm.vp.fsub.v1f16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef) - %V2F16_VP = call <2 x bfloat> @llvm.vp.fsub.v2f16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef) - %V4F16_VP = call <4 x bfloat> @llvm.vp.fsub.v4f16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef) - %V8F16_VP = call <8 x bfloat> @llvm.vp.fsub.v8f16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef) - %V16F16_VP = call <16 x bfloat> @llvm.vp.fsub.v16f16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef) - - %V1F32_VP = call <1 x float> @llvm.vp.fsub.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef) - %V2F32_VP = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) - %V4F32_VP = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) - %V8F32_VP = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) - %V16F32_VP = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) - - %V1F64_VP = call <1 x double> @llvm.vp.fsub.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef) - %V2F64_VP = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) - %V4F64_VP = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) - %V8F64_VP = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) - - %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) + %F32 = fsub float poison, poison + %F64 = fsub double poison, poison + + %V1F32 = fsub <1 x float> poison, poison + %V2F32 = fsub <2 x float> poison, poison + %V4F32 = fsub <4 x float> poison, poison + %V8F32 = fsub <8 x float> poison, poison + %V16F32 = fsub <16 x float> poison, poison + + %NXV1F32 = fsub <vscale x 1 x float> poison, poison + %NXV2F32 = fsub <vscale x 2 x float> poison, poison + %NXV4F32 = fsub <vscale x 4 x float> poison, poison + %NXV8F32 = fsub <vscale x 8 x float> poison, poison + %NXV16F32 = fsub <vscale x 16 x float> poison, poison + + %V1F64 = fsub <1 x double> poison, poison + %V2F64 = fsub <2 x double> poison, poison + %V4F64 = fsub <4 x double> poison, poison + %V8F64 = fsub <8 x double> poison, poison + + %NXV1F64 = fsub <vscale x 1 x double> poison, poison + %NXV2F64 = fsub <vscale x 2 x double> poison, poison + %NXV4F64 = fsub <vscale x 4 x double> poison, poison + %NXV8F64 = fsub <vscale x 8 x double> poison, poison + + %V1F32_VP = call <1 x float> @llvm.vp.fsub(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) + %V2F32_VP = call <2 x float> @llvm.vp.fsub(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) + %V4F32_VP = call <4 x float> @llvm.vp.fsub(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) + %V8F32_VP = call <8 x float> @llvm.vp.fsub(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) + %V16F32_VP = call <16 x float> @llvm.vp.fsub(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) + + %V1F64_VP = call <1 x double> @llvm.vp.fsub(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) + %V2F64_VP = call <2 x double> @llvm.vp.fsub(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) + %V4F64_VP = call <4 x double> @llvm.vp.fsub(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) + %V8F64_VP = call <8 x double> @llvm.vp.fsub(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) + + %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fsub(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fsub(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fsub(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fsub(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fsub(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) + + %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fsub(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fsub(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fsub(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fsub(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) + + ret void +} + +define void @fsub_bf16() { +; ZVFH-LABEL: 'fsub_bf16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fsub bfloat poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fsub <1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fsub <2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fsub <4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fsub <8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fsub <16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fsub <32 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fsub_bf16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fsub bfloat poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fsub <1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fsub <2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fsub <4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fsub <8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fsub <16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fsub <32 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'fsub_bf16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fsub bfloat poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fsub <1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fsub <2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fsub <4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fsub <8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fsub <16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fsub <32 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %BF16 = fsub bfloat poison, poison + + %V1BF16 = fsub <1 x bfloat> poison, poison + %V2BF16 = fsub <2 x bfloat> poison, poison + %V4BF16 = fsub <4 x bfloat> poison, poison + %V8BF16 = fsub <8 x bfloat> poison, poison + %V16BF16 = fsub <16 x bfloat> poison, poison + %V32BF16 = fsub <32 x bfloat> poison, poison + + %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison + %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison + %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison + %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison + %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison + %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison + + %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) + %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) + %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) + %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) + %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) + + %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @fsub_f16() { ; ZVFH-LABEL: 'fsub_f16' -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub <vscale x 1 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub <vscale x 2 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub <vscale x 4 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub <vscale x 8 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub <vscale x 16 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub <vscale x 1 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub <vscale x 2 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub <vscale x 4 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub <vscale x 8 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub <vscale x 16 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; ZVFHMIN-LABEL: 'fsub_f16' -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fsub <1 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fsub <2 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fsub <4 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fsub <8 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fsub <16 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fsub <32 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fsub <vscale x 1 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fsub <vscale x 2 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fsub <vscale x 4 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fsub <vscale x 8 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fsub <vscale x 16 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fsub <1 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fsub <2 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fsub <4 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fsub <8 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fsub <16 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fsub <32 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fsub <vscale x 1 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fsub <vscale x 2 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fsub <vscale x 4 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fsub <vscale x 8 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fsub <vscale x 16 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %F16 = fsub half undef, undef - - %V1F16 = fsub <1 x half> undef, undef - %V2F16 = fsub <2 x half> undef, undef - %V4F16 = fsub <4 x half> undef, undef - %V8F16 = fsub <8 x half> undef, undef - %V16F16 = fsub <16 x half> undef, undef - %V32F16 = fsub <32 x half> undef, undef - - %NXV1F16 = fsub <vscale x 1 x half> undef, undef - %NXV2F16 = fsub <vscale x 2 x half> undef, undef - %NXV4F16 = fsub <vscale x 4 x half> undef, undef - %NXV8F16 = fsub <vscale x 8 x half> undef, undef - %NXV16F16 = fsub <vscale x 16 x half> undef, undef - %NXV32F16 = fsub <vscale x 32 x half> undef, undef - - %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) - %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) - %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) - %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) - %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) - - %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; NO-ZFHMIN-LABEL: 'fsub_f16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fsub <2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fsub <4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fsub <8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fsub <16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fsub <32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = fsub <vscale x 1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = fsub <vscale x 2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = fsub <vscale x 4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = fsub <vscale x 8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = fsub <vscale x 16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = fsub <vscale x 32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %F16 = fsub half poison, poison + + %V1F16 = fsub <1 x half> poison, poison + %V2F16 = fsub <2 x half> poison, poison + %V4F16 = fsub <4 x half> poison, poison + %V8F16 = fsub <8 x half> poison, poison + %V16F16 = fsub <16 x half> poison, poison + %V32F16 = fsub <32 x half> poison, poison + + %NXV1F16 = fsub <vscale x 1 x half> poison, poison + %NXV2F16 = fsub <vscale x 2 x half> poison, poison + %NXV4F16 = fsub <vscale x 4 x half> poison, poison + %NXV8F16 = fsub <vscale x 8 x half> poison, poison + %NXV16F16 = fsub <vscale x 16 x half> poison, poison + %NXV32F16 = fsub <vscale x 32 x half> poison, poison + + %V1F16_VP = call <1 x half> @llvm.vp.fsub(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) + %V2F16_VP = call <2 x half> @llvm.vp.fsub(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) + %V4F16_VP = call <4 x half> @llvm.vp.fsub(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) + %V8F16_VP = call <8 x half> @llvm.vp.fsub(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) + %V16F16_VP = call <16 x half> @llvm.vp.fsub(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) + + %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @fmul() { ; CHECK-LABEL: 'fmul' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fmul <1 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fmul <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fmul <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fmul <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fmul <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fmul <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fmul <vscale x 1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fmul <vscale x 2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fmul <vscale x 4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fmul <vscale x 8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fmul <vscale x 16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fmul <1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fmul <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fmul <vscale x 1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fmul <vscale x 2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fmul <vscale x 4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fmul <vscale x 8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fmul.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fmul.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fmul.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fmul.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fmul.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fmul.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fmul <16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fmul <vscale x 1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fmul <vscale x 2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fmul <vscale x 4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fmul <vscale x 8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fmul <vscale x 16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fmul <1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fmul <8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fmul <vscale x 1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fmul <vscale x 2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fmul <vscale x 4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fmul <vscale x 8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fmul.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fmul.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fmul.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fmul.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fmul.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fmul.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %BF16 = fmul bfloat undef, undef - %F32 = fmul float undef, undef - %F64 = fmul double undef, undef - - %V1BF16 = fmul <1 x bfloat> undef, undef - %V2BF16 = fmul <2 x bfloat> undef, undef - %V4BF16 = fmul <4 x bfloat> undef, undef - %V8BF16 = fmul <8 x bfloat> undef, undef - %V16BF16 = fmul <16 x bfloat> undef, undef - - %NXV1BF16 = fmul <vscale x 1 x bfloat> undef, undef - %NXV2BF16 = fmul <vscale x 2 x bfloat> undef, undef - %NXV4BF16 = fmul <vscale x 4 x bfloat> undef, undef - %NXV8BF16 = fmul <vscale x 8 x bfloat> undef, undef - %NXV16BF16 = fmul <vscale x 16 x bfloat> undef, undef - - %V1F32 = fmul <1 x float> undef, undef - %V2F32 = fmul <2 x float> undef, undef - %V4F32 = fmul <4 x float> undef, undef - %V8F32 = fmul <8 x float> undef, undef - %V16F32 = fmul <16 x float> undef, undef - - %NXV1F32 = fmul <vscale x 1 x float> undef, undef - %NXV2F32 = fmul <vscale x 2 x float> undef, undef - %NXV4F32 = fmul <vscale x 4 x float> undef, undef - %NXV8F32 = fmul <vscale x 8 x float> undef, undef - %NXV16F32 = fmul <vscale x 16 x float> undef, undef - - %V1F64 = fmul <1 x double> undef, undef - %V2F64 = fmul <2 x double> undef, undef - %V4F64 = fmul <4 x double> undef, undef - %V8F64 = fmul <8 x double> undef, undef - - %NXV1F64 = fmul <vscale x 1 x double> undef, undef - %NXV2F64 = fmul <vscale x 2 x double> undef, undef - %NXV4F64 = fmul <vscale x 4 x double> undef, undef - %NXV8F64 = fmul <vscale x 8 x double> undef, undef - - %V1F16_VP = call <1 x bfloat> @llvm.vp.fmul.v1f16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef) - %V2F16_VP = call <2 x bfloat> @llvm.vp.fmul.v2f16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef) - %V4F16_VP = call <4 x bfloat> @llvm.vp.fmul.v4f16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef) - %V8F16_VP = call <8 x bfloat> @llvm.vp.fmul.v8f16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef) - %V16F16_VP = call <16 x bfloat> @llvm.vp.fmul.v16f16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef) - - %V1F32_VP = call <1 x float> @llvm.vp.fmul.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef) - %V2F32_VP = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) - %V4F32_VP = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) - %V8F32_VP = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) - %V16F32_VP = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) - - %V1F64_VP = call <1 x double> @llvm.vp.fmul.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef) - %V2F64_VP = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) - %V4F64_VP = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) - %V8F64_VP = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) - - %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fmul.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fmul.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fmul.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fmul.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) + %F32 = fmul float poison, poison + %F64 = fmul double poison, poison + + %V1F32 = fmul <1 x float> poison, poison + %V2F32 = fmul <2 x float> poison, poison + %V4F32 = fmul <4 x float> poison, poison + %V8F32 = fmul <8 x float> poison, poison + %V16F32 = fmul <16 x float> poison, poison + + %NXV1F32 = fmul <vscale x 1 x float> poison, poison + %NXV2F32 = fmul <vscale x 2 x float> poison, poison + %NXV4F32 = fmul <vscale x 4 x float> poison, poison + %NXV8F32 = fmul <vscale x 8 x float> poison, poison + %NXV16F32 = fmul <vscale x 16 x float> poison, poison + + %V1F64 = fmul <1 x double> poison, poison + %V2F64 = fmul <2 x double> poison, poison + %V4F64 = fmul <4 x double> poison, poison + %V8F64 = fmul <8 x double> poison, poison + + %NXV1F64 = fmul <vscale x 1 x double> poison, poison + %NXV2F64 = fmul <vscale x 2 x double> poison, poison + %NXV4F64 = fmul <vscale x 4 x double> poison, poison + %NXV8F64 = fmul <vscale x 8 x double> poison, poison + + %V1F32_VP = call <1 x float> @llvm.vp.fmul(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) + %V2F32_VP = call <2 x float> @llvm.vp.fmul(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) + %V4F32_VP = call <4 x float> @llvm.vp.fmul(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) + %V8F32_VP = call <8 x float> @llvm.vp.fmul(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) + %V16F32_VP = call <16 x float> @llvm.vp.fmul(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) + + %V1F64_VP = call <1 x double> @llvm.vp.fmul(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) + %V2F64_VP = call <2 x double> @llvm.vp.fmul(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) + %V4F64_VP = call <4 x double> @llvm.vp.fmul(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) + %V8F64_VP = call <8 x double> @llvm.vp.fmul(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) + + %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fmul(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fmul(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fmul(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fmul(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fmul(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) + + %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fmul(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fmul(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fmul(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fmul(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) + + ret void +} + +define void @fmul_bf16() { +; ZVFH-LABEL: 'fmul_bf16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fmul <1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fmul <2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fmul <4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fmul <8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fmul <16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fmul <32 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fmul_bf16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fmul <1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fmul <2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fmul <4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fmul <8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fmul <16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fmul <32 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'fmul_bf16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fmul <1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fmul <2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fmul <4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fmul <8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fmul <16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fmul <32 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %BF16 = fmul bfloat poison, poison + + %V1BF16 = fmul <1 x bfloat> poison, poison + %V2BF16 = fmul <2 x bfloat> poison, poison + %V4BF16 = fmul <4 x bfloat> poison, poison + %V8BF16 = fmul <8 x bfloat> poison, poison + %V16BF16 = fmul <16 x bfloat> poison, poison + %V32BF16 = fmul <32 x bfloat> poison, poison + + %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison + %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison + %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison + %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison + %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison + %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison + + %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) + %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) + %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) + %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) + %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) + + %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @fmul_f16() { ; ZVFH-LABEL: 'fmul_f16' -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul <vscale x 1 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul <vscale x 2 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul <vscale x 4 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul <vscale x 8 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul <vscale x 16 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul <vscale x 1 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul <vscale x 2 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul <vscale x 4 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul <vscale x 8 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul <vscale x 16 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; ZVFHMIN-LABEL: 'fmul_f16' -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fmul <1 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fmul <2 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fmul <4 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fmul <8 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fmul <16 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fmul <32 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fmul <vscale x 1 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fmul <vscale x 2 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fmul <vscale x 4 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fmul <vscale x 8 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fmul <vscale x 16 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fmul <1 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fmul <2 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fmul <4 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fmul <8 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fmul <16 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fmul <32 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fmul <vscale x 1 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fmul <vscale x 2 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fmul <vscale x 4 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fmul <vscale x 8 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fmul <vscale x 16 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %F16 = fmul half undef, undef - - %V1F16 = fmul <1 x half> undef, undef - %V2F16 = fmul <2 x half> undef, undef - %V4F16 = fmul <4 x half> undef, undef - %V8F16 = fmul <8 x half> undef, undef - %V16F16 = fmul <16 x half> undef, undef - %V32F16 = fmul <32 x half> undef, undef - - %NXV1F16 = fmul <vscale x 1 x half> undef, undef - %NXV2F16 = fmul <vscale x 2 x half> undef, undef - %NXV4F16 = fmul <vscale x 4 x half> undef, undef - %NXV8F16 = fmul <vscale x 8 x half> undef, undef - %NXV16F16 = fmul <vscale x 16 x half> undef, undef - %NXV32F16 = fmul <vscale x 32 x half> undef, undef - - %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) - %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) - %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) - %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) - %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) - - %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; NO-ZFHMIN-LABEL: 'fmul_f16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fmul <2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fmul <4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fmul <8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fmul <16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fmul <32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = fmul <vscale x 1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = fmul <vscale x 2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = fmul <vscale x 4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = fmul <vscale x 8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = fmul <vscale x 16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = fmul <vscale x 32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %F16 = fmul half poison, poison + + %V1F16 = fmul <1 x half> poison, poison + %V2F16 = fmul <2 x half> poison, poison + %V4F16 = fmul <4 x half> poison, poison + %V8F16 = fmul <8 x half> poison, poison + %V16F16 = fmul <16 x half> poison, poison + %V32F16 = fmul <32 x half> poison, poison + + %NXV1F16 = fmul <vscale x 1 x half> poison, poison + %NXV2F16 = fmul <vscale x 2 x half> poison, poison + %NXV4F16 = fmul <vscale x 4 x half> poison, poison + %NXV8F16 = fmul <vscale x 8 x half> poison, poison + %NXV16F16 = fmul <vscale x 16 x half> poison, poison + %NXV32F16 = fmul <vscale x 32 x half> poison, poison + + %V1F16_VP = call <1 x half> @llvm.vp.fmul(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) + %V2F16_VP = call <2 x half> @llvm.vp.fmul(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) + %V4F16_VP = call <4 x half> @llvm.vp.fmul(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) + %V8F16_VP = call <8 x half> @llvm.vp.fmul(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) + %V16F16_VP = call <16 x half> @llvm.vp.fmul(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) + + %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @fdiv() { ; CHECK-LABEL: 'fdiv' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fdiv float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fdiv double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fdiv <1 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fdiv <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fdiv <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fdiv <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fdiv <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fdiv <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fdiv <vscale x 1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fdiv <vscale x 2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fdiv <vscale x 4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fdiv <vscale x 8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fdiv <vscale x 16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fdiv <1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fdiv <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fdiv <vscale x 1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fdiv <vscale x 2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fdiv <vscale x 4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fdiv <vscale x 8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fdiv.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fdiv.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fdiv.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fdiv.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fdiv.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fdiv.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fdiv.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fdiv float poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fdiv double poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fdiv <16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fdiv <vscale x 1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fdiv <vscale x 2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fdiv <vscale x 4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fdiv <vscale x 8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fdiv <vscale x 16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fdiv <1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fdiv <8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fdiv <vscale x 1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fdiv <vscale x 2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fdiv <vscale x 4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fdiv <vscale x 8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fdiv.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fdiv.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fdiv.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fdiv.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fdiv.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fdiv.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fdiv.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %BF16 = fdiv bfloat undef, undef - %F32 = fdiv float undef, undef - %F64 = fdiv double undef, undef - - %V1BF16 = fdiv <1 x bfloat> undef, undef - %V2BF16 = fdiv <2 x bfloat> undef, undef - %V4BF16 = fdiv <4 x bfloat> undef, undef - %V8BF16 = fdiv <8 x bfloat> undef, undef - %V16BF16 = fdiv <16 x bfloat> undef, undef - - %NXV1BF16 = fdiv <vscale x 1 x bfloat> undef, undef - %NXV2BF16 = fdiv <vscale x 2 x bfloat> undef, undef - %NXV4BF16 = fdiv <vscale x 4 x bfloat> undef, undef - %NXV8BF16 = fdiv <vscale x 8 x bfloat> undef, undef - %NXV16BF16 = fdiv <vscale x 16 x bfloat> undef, undef - - %V1F32 = fdiv <1 x float> undef, undef - %V2F32 = fdiv <2 x float> undef, undef - %V4F32 = fdiv <4 x float> undef, undef - %V8F32 = fdiv <8 x float> undef, undef - %V16F32 = fdiv <16 x float> undef, undef - - %NXV1F32 = fdiv <vscale x 1 x float> undef, undef - %NXV2F32 = fdiv <vscale x 2 x float> undef, undef - %NXV4F32 = fdiv <vscale x 4 x float> undef, undef - %NXV8F32 = fdiv <vscale x 8 x float> undef, undef - %NXV16F32 = fdiv <vscale x 16 x float> undef, undef - - %V1F64 = fdiv <1 x double> undef, undef - %V2F64 = fdiv <2 x double> undef, undef - %V4F64 = fdiv <4 x double> undef, undef - %V8F64 = fdiv <8 x double> undef, undef - - %NXV1F64 = fdiv <vscale x 1 x double> undef, undef - %NXV2F64 = fdiv <vscale x 2 x double> undef, undef - %NXV4F64 = fdiv <vscale x 4 x double> undef, undef - %NXV8F64 = fdiv <vscale x 8 x double> undef, undef - - %V1F16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1f16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef) - %V2F16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2f16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef) - %V4F16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4f16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef) - %V8F16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8f16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef) - %V16F16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16f16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef) - - %V1F32_VP = call <1 x float> @llvm.vp.fdiv.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef) - %V2F32_VP = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) - %V4F32_VP = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) - %V8F32_VP = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) - %V16F32_VP = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) - - %V1F64_VP = call <1 x double> @llvm.vp.fdiv.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef) - %V2F64_VP = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) - %V4F64_VP = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) - %V8F64_VP = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) - - %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fdiv.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fdiv.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fdiv.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fdiv.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fdiv.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) + %F32 = fdiv float poison, poison + %F64 = fdiv double poison, poison + + %V1F32 = fdiv <1 x float> poison, poison + %V2F32 = fdiv <2 x float> poison, poison + %V4F32 = fdiv <4 x float> poison, poison + %V8F32 = fdiv <8 x float> poison, poison + %V16F32 = fdiv <16 x float> poison, poison + + %NXV1F32 = fdiv <vscale x 1 x float> poison, poison + %NXV2F32 = fdiv <vscale x 2 x float> poison, poison + %NXV4F32 = fdiv <vscale x 4 x float> poison, poison + %NXV8F32 = fdiv <vscale x 8 x float> poison, poison + %NXV16F32 = fdiv <vscale x 16 x float> poison, poison + + %V1F64 = fdiv <1 x double> poison, poison + %V2F64 = fdiv <2 x double> poison, poison + %V4F64 = fdiv <4 x double> poison, poison + %V8F64 = fdiv <8 x double> poison, poison + + %NXV1F64 = fdiv <vscale x 1 x double> poison, poison + %NXV2F64 = fdiv <vscale x 2 x double> poison, poison + %NXV4F64 = fdiv <vscale x 4 x double> poison, poison + %NXV8F64 = fdiv <vscale x 8 x double> poison, poison + + %V1F32_VP = call <1 x float> @llvm.vp.fdiv(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) + %V2F32_VP = call <2 x float> @llvm.vp.fdiv(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) + %V4F32_VP = call <4 x float> @llvm.vp.fdiv(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) + %V8F32_VP = call <8 x float> @llvm.vp.fdiv(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) + %V16F32_VP = call <16 x float> @llvm.vp.fdiv(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) + + %V1F64_VP = call <1 x double> @llvm.vp.fdiv(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) + %V2F64_VP = call <2 x double> @llvm.vp.fdiv(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) + %V4F64_VP = call <4 x double> @llvm.vp.fdiv(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) + %V8F64_VP = call <8 x double> @llvm.vp.fdiv(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) + + + %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fdiv(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fdiv(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fdiv(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fdiv(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fdiv(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) + + %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fdiv(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fdiv(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fdiv(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fdiv(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) + + ret void +} + +define void @fdiv_bf16() { +; ZVFH-LABEL: 'fdiv_bf16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fdiv <1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fdiv <2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fdiv <4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fdiv <8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fdiv <16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fdiv <32 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fdiv_bf16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fdiv <1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fdiv <2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fdiv <4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fdiv <8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fdiv <16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fdiv <32 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'fdiv_bf16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fdiv <1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fdiv <2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fdiv <4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fdiv <8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fdiv <16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fdiv <32 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %BF16 = fdiv bfloat poison, poison + + %V1BF16 = fdiv <1 x bfloat> poison, poison + %V2BF16 = fdiv <2 x bfloat> poison, poison + %V4BF16 = fdiv <4 x bfloat> poison, poison + %V8BF16 = fdiv <8 x bfloat> poison, poison + %V16BF16 = fdiv <16 x bfloat> poison, poison + %V32BF16 = fdiv <32 x bfloat> poison, poison + + %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison + %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison + %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison + %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison + %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison + %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison + + %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) + %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) + %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) + %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) + %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) + + %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @fdiv_f16() { ; ZVFH-LABEL: 'fdiv_f16' -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> undef, undef -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; ZVFHMIN-LABEL: 'fdiv_f16' -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fdiv <1 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fdiv <2 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fdiv <4 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fdiv <8 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fdiv <16 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fdiv <32 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> undef, undef -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fdiv <1 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fdiv <2 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fdiv <4 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fdiv <8 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fdiv <16 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fdiv <32 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %F16 = fdiv half undef, undef - - %V1F16 = fdiv <1 x half> undef, undef - %V2F16 = fdiv <2 x half> undef, undef - %V4F16 = fdiv <4 x half> undef, undef - %V8F16 = fdiv <8 x half> undef, undef - %V16F16 = fdiv <16 x half> undef, undef - %V32F16 = fdiv <32 x half> undef, undef - - %NXV1F16 = fdiv <vscale x 1 x half> undef, undef - %NXV2F16 = fdiv <vscale x 2 x half> undef, undef - %NXV4F16 = fdiv <vscale x 4 x half> undef, undef - %NXV8F16 = fdiv <vscale x 8 x half> undef, undef - %NXV16F16 = fdiv <vscale x 16 x half> undef, undef - %NXV32F16 = fdiv <vscale x 32 x half> undef, undef - - %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef) - %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef) - %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef) - %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef) - %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef) - - %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef) +; NO-ZFHMIN-LABEL: 'fdiv_f16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fdiv <2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fdiv <4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fdiv <8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fdiv <16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fdiv <32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = fdiv <vscale x 1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = fdiv <vscale x 2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = fdiv <vscale x 4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = fdiv <vscale x 8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = fdiv <vscale x 16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = fdiv <vscale x 32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %F16 = fdiv half poison, poison + + %V1F16 = fdiv <1 x half> poison, poison + %V2F16 = fdiv <2 x half> poison, poison + %V4F16 = fdiv <4 x half> poison, poison + %V8F16 = fdiv <8 x half> poison, poison + %V16F16 = fdiv <16 x half> poison, poison + %V32F16 = fdiv <32 x half> poison, poison + + %NXV1F16 = fdiv <vscale x 1 x half> poison, poison + %NXV2F16 = fdiv <vscale x 2 x half> poison, poison + %NXV4F16 = fdiv <vscale x 4 x half> poison, poison + %NXV8F16 = fdiv <vscale x 8 x half> poison, poison + %NXV16F16 = fdiv <vscale x 16 x half> poison, poison + %NXV32F16 = fdiv <vscale x 32 x half> poison, poison + + %V1F16_VP = call <1 x half> @llvm.vp.fdiv(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) + %V2F16_VP = call <2 x half> @llvm.vp.fdiv(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) + %V4F16_VP = call <4 x half> @llvm.vp.fdiv(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) + %V8F16_VP = call <8 x half> @llvm.vp.fdiv(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) + %V16F16_VP = call <16 x half> @llvm.vp.fdiv(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) + + %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @frem() { ; CHECK-LABEL: 'frem' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = frem <1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = frem <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = frem <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32 = frem <vscale x 1 x float> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32 = frem <vscale x 2 x float> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32 = frem <vscale x 4 x float> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32 = frem <vscale x 8 x float> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32 = frem <vscale x 16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = frem <1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F64 = frem <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64 = frem <vscale x 1 x double> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F64 = frem <vscale x 2 x double> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F64 = frem <vscale x 4 x double> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F64 = frem <vscale x 8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.frem.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.frem.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.frem.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.frem.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.frem.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.frem.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = frem <1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = frem <2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = frem <4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> poison, poison +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32 = frem <vscale x 1 x float> poison, poison +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32 = frem <vscale x 2 x float> poison, poison +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32 = frem <vscale x 4 x float> poison, poison +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32 = frem <vscale x 8 x float> poison, poison +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32 = frem <vscale x 16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = frem <1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F64 = frem <8 x double> poison, poison +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64 = frem <vscale x 1 x double> poison, poison +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F64 = frem <vscale x 2 x double> poison, poison +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F64 = frem <vscale x 4 x double> poison, poison +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F64 = frem <vscale x 8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.frem.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.frem.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.frem.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.frem.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.frem.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.frem.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %BF16 = frem bfloat undef, undef - %F32 = frem float undef, undef - %F64 = frem double undef, undef - - %V1BF16 = frem <1 x bfloat> undef, undef - %V2BF16 = frem <2 x bfloat> undef, undef - %V4BF16 = frem <4 x bfloat> undef, undef - %V8BF16 = frem <8 x bfloat> undef, undef - %V16BF16 = frem <16 x bfloat> undef, undef - - %NXV1BF16 = frem <vscale x 1 x bfloat> undef, undef - %NXV2BF16 = frem <vscale x 2 x bfloat> undef, undef - %NXV4BF16 = frem <vscale x 4 x bfloat> undef, undef - %NXV8BF16 = frem <vscale x 8 x bfloat> undef, undef - %NXV16BF16 = frem <vscale x 16 x bfloat> undef, undef - - %V1F32 = frem <1 x float> undef, undef - %V2F32 = frem <2 x float> undef, undef - %V4F32 = frem <4 x float> undef, undef - %V8F32 = frem <8 x float> undef, undef - %V16F32 = frem <16 x float> undef, undef - - %NXV1F32 = frem <vscale x 1 x float> undef, undef - %NXV2F32 = frem <vscale x 2 x float> undef, undef - %NXV4F32 = frem <vscale x 4 x float> undef, undef - %NXV8F32 = frem <vscale x 8 x float> undef, undef - %NXV16F32 = frem <vscale x 16 x float> undef, undef - - %V1F64 = frem <1 x double> undef, undef - %V2F64 = frem <2 x double> undef, undef - %V4F64 = frem <4 x double> undef, undef - %V8F64 = frem <8 x double> undef, undef - - %NXV1F64 = frem <vscale x 1 x double> undef, undef - %NXV2F64 = frem <vscale x 2 x double> undef, undef - %NXV4F64 = frem <vscale x 4 x double> undef, undef - %NXV8F64 = frem <vscale x 8 x double> undef, undef - - %V1F16_VP = call <1 x bfloat> @llvm.vp.frem.v1f16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef) - %V2F16_VP = call <2 x bfloat> @llvm.vp.frem.v2f16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef) - %V4F16_VP = call <4 x bfloat> @llvm.vp.frem.v4f16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef) - %V8F16_VP = call <8 x bfloat> @llvm.vp.frem.v8f16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef) - %V16F16_VP = call <16 x bfloat> @llvm.vp.frem.v16f16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef) - - %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef) - %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) - %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) - %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) - %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) - - %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef) - %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) - %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) - %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) - - %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.frem.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.frem.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.frem.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.frem.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.frem.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.frem.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) + %F32 = frem float poison, poison + %F64 = frem double poison, poison + + %V1F32 = frem <1 x float> poison, poison + %V2F32 = frem <2 x float> poison, poison + %V4F32 = frem <4 x float> poison, poison + %V8F32 = frem <8 x float> poison, poison + %V16F32 = frem <16 x float> poison, poison + + %NXV1F32 = frem <vscale x 1 x float> poison, poison + %NXV2F32 = frem <vscale x 2 x float> poison, poison + %NXV4F32 = frem <vscale x 4 x float> poison, poison + %NXV8F32 = frem <vscale x 8 x float> poison, poison + %NXV16F32 = frem <vscale x 16 x float> poison, poison + + %V1F64 = frem <1 x double> poison, poison + %V2F64 = frem <2 x double> poison, poison + %V4F64 = frem <4 x double> poison, poison + %V8F64 = frem <8 x double> poison, poison + + %NXV1F64 = frem <vscale x 1 x double> poison, poison + %NXV2F64 = frem <vscale x 2 x double> poison, poison + %NXV4F64 = frem <vscale x 4 x double> poison, poison + %NXV8F64 = frem <vscale x 8 x double> poison, poison + + %V1F32_VP = call <1 x float> @llvm.vp.frem(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) + %V2F32_VP = call <2 x float> @llvm.vp.frem(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) + %V4F32_VP = call <4 x float> @llvm.vp.frem(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) + %V8F32_VP = call <8 x float> @llvm.vp.frem(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) + %V16F32_VP = call <16 x float> @llvm.vp.frem(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) + + %V1F64_VP = call <1 x double> @llvm.vp.frem(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) + %V2F64_VP = call <2 x double> @llvm.vp.frem(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) + %V4F64_VP = call <4 x double> @llvm.vp.frem(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) + %V8F64_VP = call <8 x double> @llvm.vp.frem(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) + + %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.frem(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.frem(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) + + %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.frem(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.frem(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.frem(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.frem(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) + + ret void +} + +define void @frem_bf16() { +; ZVFH-LABEL: 'frem_bf16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'frem_bf16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'frem_bf16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %BF16 = frem bfloat poison, poison + + %V1BF16 = frem <1 x bfloat> poison, poison + %V2BF16 = frem <2 x bfloat> poison, poison + %V4BF16 = frem <4 x bfloat> poison, poison + %V8BF16 = frem <8 x bfloat> poison, poison + %V16BF16 = frem <16 x bfloat> poison, poison + %V32BF16 = frem <32 x bfloat> poison, poison + + %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison + %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison + %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison + %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison + %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison + %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison + + %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) + %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) + %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) + %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) + %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) + + %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @frem_f16() { -; CHECK-LABEL: 'frem_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> undef, undef -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'frem_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'frem_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %F16 = frem half undef, undef - - %V1F16 = frem <1 x half> undef, undef - %V2F16 = frem <2 x half> undef, undef - %V4F16 = frem <4 x half> undef, undef - %V8F16 = frem <8 x half> undef, undef - %V16F16 = frem <16 x half> undef, undef - %V32F16 = frem <32 x half> undef, undef - - %NXV1F16 = frem <vscale x 1 x half> undef, undef - %NXV2F16 = frem <vscale x 2 x half> undef, undef - %NXV4F16 = frem <vscale x 4 x half> undef, undef - %NXV8F16 = frem <vscale x 8 x half> undef, undef - %NXV16F16 = frem <vscale x 16 x half> undef, undef - %NXV32F16 = frem <vscale x 32 x half> undef, undef +; NO-ZFHMIN-LABEL: 'frem_f16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = frem <1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = frem <2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = frem <4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = frem <8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = frem <16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = frem <32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %F16 = frem half poison, poison + + %V1F16 = frem <1 x half> poison, poison + %V2F16 = frem <2 x half> poison, poison + %V4F16 = frem <4 x half> poison, poison + %V8F16 = frem <8 x half> poison, poison + %V16F16 = frem <16 x half> poison, poison + %V32F16 = frem <32 x half> poison, poison + + %NXV1F16 = frem <vscale x 1 x half> poison, poison + %NXV2F16 = frem <vscale x 2 x half> poison, poison + %NXV4F16 = frem <vscale x 4 x half> poison, poison + %NXV8F16 = frem <vscale x 8 x half> poison, poison + %NXV16F16 = frem <vscale x 16 x half> poison, poison + %NXV32F16 = frem <vscale x 32 x half> poison, poison + + %V1F16_VP = call <1 x half> @llvm.vp.frem(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) + %V2F16_VP = call <2 x half> @llvm.vp.frem(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) + %V4F16_VP = call <4 x half> @llvm.vp.frem(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) + %V8F16_VP = call <8 x half> @llvm.vp.frem(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) + %V16F16_VP = call <16 x half> @llvm.vp.frem(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) + + %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @fneg() { ; CHECK-LABEL: 'fneg' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg half undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16 = fneg <2 x bfloat> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fneg <4 x bfloat> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fneg <8 x bfloat> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fneg <16 x bfloat> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fneg <1 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <2 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fneg <vscale x 1 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fneg <vscale x 2 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fneg <vscale x 4 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fneg <vscale x 8 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fneg <vscale x 16 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fneg <1 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fneg <2 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fneg <8 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fneg <vscale x 1 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fneg <vscale x 2 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fneg <vscale x 4 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fneg <vscale x 8 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fneg.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fneg.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64_VP = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF32_VP = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF32_VP = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4BF32_VP = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8BF32_VP = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16BF32_VP = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF64_VP = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2BF64_VP = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4BF64_VP = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8BF64_VP = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16BF64_VP = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fneg <1 x float> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <2 x float> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fneg <vscale x 1 x float> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fneg <vscale x 2 x float> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fneg <vscale x 4 x float> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fneg <vscale x 8 x float> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fneg <vscale x 16 x float> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fneg <1 x double> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fneg <2 x double> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fneg <8 x double> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fneg <vscale x 1 x double> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fneg <vscale x 2 x double> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fneg <vscale x 4 x double> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fneg <vscale x 8 x double> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fneg.v1f32(<1 x float> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fneg.v1f64(<1 x double> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64_VP = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16F64_VP = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %BF16 = fneg half undef - %F32 = fneg float undef - %F64 = fneg double undef - - %V1BF16 = fneg <1 x bfloat> undef - %V2BF16 = fneg <2 x bfloat> undef - %V4BF16 = fneg <4 x bfloat> undef - %V8BF16 = fneg <8 x bfloat> undef - %V16BF16 = fneg <16 x bfloat> undef - - %NXV1BF16 = fneg <vscale x 1 x bfloat> undef - %NXV2BF16 = fneg <vscale x 2 x bfloat> undef - %NXV4BF16 = fneg <vscale x 4 x bfloat> undef - %NXV8BF16 = fneg <vscale x 8 x bfloat> undef - %NXV16BF16 = fneg <vscale x 16 x bfloat> undef - - %V1F32 = fneg <1 x float> undef - %V2F32 = fneg <2 x float> undef - %V4F32 = fneg <4 x float> undef - %V8F32 = fneg <8 x float> undef - %V16F32 = fneg <16 x float> undef - - %NXV1F32 = fneg <vscale x 1 x float> undef - %NXV2F32 = fneg <vscale x 2 x float> undef - %NXV4F32 = fneg <vscale x 4 x float> undef - %NXV8F32 = fneg <vscale x 8 x float> undef - %NXV16F32 = fneg <vscale x 16 x float> undef - - %V1F64 = fneg <1 x double> undef - %V2F64 = fneg <2 x double> undef - %V4F64 = fneg <4 x double> undef - %V8F64 = fneg <8 x double> undef - - %NXV1F64 = fneg <vscale x 1 x double> undef - %NXV2F64 = fneg <vscale x 2 x double> undef - %NXV4F64 = fneg <vscale x 4 x double> undef - %NXV8F64 = fneg <vscale x 8 x double> undef - - %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1f16(<1 x bfloat> undef, <1 x i1> undef, i32 undef) - %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2f16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) - %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4f16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) - %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8f16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) - %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16f16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) - - %V1F32_VP = call <1 x float> @llvm.vp.fneg.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef) - %V2F32_VP = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) - %V4F32_VP = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) - %V8F32_VP = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) - %V16F32_VP = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) - - %V1F64_VP = call <1 x double> @llvm.vp.fneg.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef) - %V2F64_VP = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) - %V4F64_VP = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) - %V8F64_VP = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) - %V16F64_VP = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) - - %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1BF32_VP = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2BF32_VP = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4BF32_VP = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8BF32_VP = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16BF32_VP = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) - - %NXV1BF64_VP = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) - %NXV2BF64_VP = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) - %NXV4BF64_VP = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) - %NXV8BF64_VP = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) - %NXV16BF64_VP = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef) + %F32 = fneg float poison + %F64 = fneg double poison + + %V1F32 = fneg <1 x float> poison + %V2F32 = fneg <2 x float> poison + %V4F32 = fneg <4 x float> poison + %V8F32 = fneg <8 x float> poison + %V16F32 = fneg <16 x float> poison + + %NXV1F32 = fneg <vscale x 1 x float> poison + %NXV2F32 = fneg <vscale x 2 x float> poison + %NXV4F32 = fneg <vscale x 4 x float> poison + %NXV8F32 = fneg <vscale x 8 x float> poison + %NXV16F32 = fneg <vscale x 16 x float> poison + + %V1F64 = fneg <1 x double> poison + %V2F64 = fneg <2 x double> poison + %V4F64 = fneg <4 x double> poison + %V8F64 = fneg <8 x double> poison + + %NXV1F64 = fneg <vscale x 1 x double> poison + %NXV2F64 = fneg <vscale x 2 x double> poison + %NXV4F64 = fneg <vscale x 4 x double> poison + %NXV8F64 = fneg <vscale x 8 x double> poison + + %V1F32_VP = call <1 x float> @llvm.vp.fneg(<1 x float> poison, <1 x i1> poison, i32 poison) + %V2F32_VP = call <2 x float> @llvm.vp.fneg(<2 x float> poison, <2 x i1> poison, i32 poison) + %V4F32_VP = call <4 x float> @llvm.vp.fneg(<4 x float> poison, <4 x i1> poison, i32 poison) + %V8F32_VP = call <8 x float> @llvm.vp.fneg(<8 x float> poison, <8 x i1> poison, i32 poison) + %V16F32_VP = call <16 x float> @llvm.vp.fneg(<16 x float> poison, <16 x i1> poison, i32 poison) + + %V1F64_VP = call <1 x double> @llvm.vp.fneg(<1 x double> poison, <1 x i1> poison, i32 poison) + %V2F64_VP = call <2 x double> @llvm.vp.fneg(<2 x double> poison, <2 x i1> poison, i32 poison) + %V4F64_VP = call <4 x double> @llvm.vp.fneg(<4 x double> poison, <4 x i1> poison, i32 poison) + %V8F64_VP = call <8 x double> @llvm.vp.fneg(<8 x double> poison, <8 x i1> poison, i32 poison) + %V16F64_VP = call <16 x double> @llvm.vp.fneg(<16 x double> poison, <16 x i1> poison, i32 poison) + + %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fneg(<vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fneg(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fneg(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fneg(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fneg(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) + + %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fneg(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fneg(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fneg(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fneg(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F64_VP = call <vscale x 16 x double> @llvm.vp.fneg(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison) + + ret void +} + +define void @fneg_bf16() { +; ZVFH-LABEL: 'fneg_bf16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg bfloat poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16 = fneg <2 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fneg <4 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fneg <8 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fneg <16 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32BF16 = fneg <32 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fneg <vscale x 32 x bfloat> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fneg_bf16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg bfloat poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16 = fneg <2 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fneg <4 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fneg <8 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fneg <16 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32BF16 = fneg <32 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fneg <vscale x 32 x bfloat> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'fneg_bf16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg bfloat poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fneg <2 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fneg <4 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fneg <8 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fneg <16 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fneg <32 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = fneg <vscale x 32 x bfloat> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %BF16 = fneg bfloat poison + + %V1BF16 = fneg <1 x bfloat> poison + %V2BF16 = fneg <2 x bfloat> poison + %V4BF16 = fneg <4 x bfloat> poison + %V8BF16 = fneg <8 x bfloat> poison + %V16BF16 = fneg <16 x bfloat> poison + %V32BF16 = fneg <32 x bfloat> poison + + %NXV1BF16 = fneg <vscale x 1 x bfloat> poison + %NXV2BF16 = fneg <vscale x 2 x bfloat> poison + %NXV4BF16 = fneg <vscale x 4 x bfloat> poison + %NXV8BF16 = fneg <vscale x 8 x bfloat> poison + %NXV16BF16 = fneg <vscale x 16 x bfloat> poison + %NXV32BF16 = fneg <vscale x 32 x bfloat> poison + + %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg(<1 x bfloat> poison, <1 x i1> poison, i32 poison) + %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg(<2 x bfloat> poison, <2 x i1> poison, i32 poison) + %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg(<4 x bfloat> poison, <4 x i1> poison, i32 poison) + %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg(<8 x bfloat> poison, <8 x i1> poison, i32 poison) + %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg(<16 x bfloat> poison, <16 x i1> poison, i32 poison) + + %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @fneg_f16() { -; CHECK-LABEL: 'fneg_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fneg_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fneg.v1f16(<1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fneg_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fneg.v1f16(<1 x half> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %F16 = fneg half undef - - %V1F16 = fneg <1 x half> undef - %V2F16 = fneg <2 x half> undef - %V4F16 = fneg <4 x half> undef - %V8F16 = fneg <8 x half> undef - %V16F16 = fneg <16 x half> undef - %V32F16 = fneg <32 x half> undef - - %NXV1F16 = fneg <vscale x 1 x half> undef - %NXV2F16 = fneg <vscale x 2 x half> undef - %NXV4F16 = fneg <vscale x 4 x half> undef - %NXV8F16 = fneg <vscale x 8 x half> undef - %NXV16F16 = fneg <vscale x 16 x half> undef - %NXV32F16 = fneg <vscale x 32 x half> undef +; NO-ZFHMIN-LABEL: 'fneg_f16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fneg <2 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fneg <4 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fneg <8 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fneg <16 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fneg <32 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = fneg <vscale x 1 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = fneg <vscale x 2 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = fneg <vscale x 4 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = fneg <vscale x 8 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = fneg <vscale x 16 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = fneg <vscale x 32 x half> poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fneg.v1f16(<1 x half> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %F16 = fneg half poison + + %V1F16 = fneg <1 x half> poison + %V2F16 = fneg <2 x half> poison + %V4F16 = fneg <4 x half> poison + %V8F16 = fneg <8 x half> poison + %V16F16 = fneg <16 x half> poison + %V32F16 = fneg <32 x half> poison + + %NXV1F16 = fneg <vscale x 1 x half> poison + %NXV2F16 = fneg <vscale x 2 x half> poison + %NXV4F16 = fneg <vscale x 4 x half> poison + %NXV8F16 = fneg <vscale x 8 x half> poison + %NXV16F16 = fneg <vscale x 16 x half> poison + %NXV32F16 = fneg <vscale x 32 x half> poison + + %V1F16_VP = call <1 x half> @llvm.vp.fneg(<1 x half> poison, <1 x i1> poison, i32 poison) + %V2F16_VP = call <2 x half> @llvm.vp.fneg(<2 x half> poison, <2 x i1> poison, i32 poison) + %V4F16_VP = call <4 x half> @llvm.vp.fneg(<4 x half> poison, <4 x i1> poison, i32 poison) + %V8F16_VP = call <8 x half> @llvm.vp.fneg(<8 x half> poison, <8 x i1> poison, i32 poison) + %V16F16_VP = call <16 x half> @llvm.vp.fneg(<16 x half> poison, <16 x i1> poison, i32 poison) + + %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) + %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) + %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) + %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) + %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) ret void } define void @fcopysign() { ; CHECK-LABEL: 'fcopysign' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.copysign.v1f32(<1 x float> undef, <1 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = call <vscale x 1 x float> @llvm.copysign.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = call <vscale x 2 x float> @llvm.copysign.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F32 = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F32 = call <vscale x 8 x float> @llvm.copysign.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F32 = call <vscale x 16 x float> @llvm.copysign.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.copysign.v1f64(<1 x double> undef, <1 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = call <vscale x 1 x double> @llvm.copysign.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F64 = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F64 = call <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float poison, float poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double poison, double poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.copysign.v1f32(<1 x float> poison, <1 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> poison, <2 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> poison, <4 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> poison, <8 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> poison, <16 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = call <vscale x 1 x float> @llvm.copysign.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = call <vscale x 2 x float> @llvm.copysign.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F32 = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F32 = call <vscale x 8 x float> @llvm.copysign.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F32 = call <vscale x 16 x float> @llvm.copysign.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.copysign.v1f64(<1 x double> poison, <1 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> poison, <2 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> poison, <4 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> poison, <8 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = call <vscale x 1 x double> @llvm.copysign.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F64 = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F64 = call <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %BF16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef) - %F32 = call float @llvm.copysign.f32(float undef, float undef) - %F64 = call double @llvm.copysign.f64(double undef, double undef) - - %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef) - %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef) - %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef) - %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef) - %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef) - - %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef) - %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef) - %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef) - %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef) - %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef) - - %V1F32 = call <1 x float> @llvm.copysign.v1f32(<1 x float> undef, <1 x float> undef) - %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef) - %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) - %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) - %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) - - %NXV1F32 = call <vscale x 1 x float> @llvm.copysign.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef) - %NXV2F32 = call <vscale x 2 x float> @llvm.copysign.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef) - %NXV4F32 = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef) - %NXV8F32 = call <vscale x 8 x float> @llvm.copysign.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef) - %NXV16F32 = call <vscale x 16 x float> @llvm.copysign.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef) - - %V1F64 = call <1 x double> @llvm.copysign.v1f64(<1 x double> undef, <1 x double> undef) - %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) - %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) - %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) - - %NXV1F64 = call <vscale x 1 x double> @llvm.copysign.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef) - %NXV2F64 = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef) - %NXV4F64 = call <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef) - %NXV8F64 = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef) + %F32 = call float @llvm.copysign.f32(float poison, float poison) + %F64 = call double @llvm.copysign.f64(double poison, double poison) + + %V1F32 = call <1 x float> @llvm.copysign(<1 x float> poison, <1 x float> poison) + %V2F32 = call <2 x float> @llvm.copysign(<2 x float> poison, <2 x float> poison) + %V4F32 = call <4 x float> @llvm.copysign(<4 x float> poison, <4 x float> poison) + %V8F32 = call <8 x float> @llvm.copysign(<8 x float> poison, <8 x float> poison) + %V16F32 = call <16 x float> @llvm.copysign(<16 x float> poison, <16 x float> poison) + + %NXV1F32 = call <vscale x 1 x float> @llvm.copysign(<vscale x 1 x float> poison, <vscale x 1 x float> poison) + %NXV2F32 = call <vscale x 2 x float> @llvm.copysign(<vscale x 2 x float> poison, <vscale x 2 x float> poison) + %NXV4F32 = call <vscale x 4 x float> @llvm.copysign(<vscale x 4 x float> poison, <vscale x 4 x float> poison) + %NXV8F32 = call <vscale x 8 x float> @llvm.copysign(<vscale x 8 x float> poison, <vscale x 8 x float> poison) + %NXV16F32 = call <vscale x 16 x float> @llvm.copysign(<vscale x 16 x float> poison, <vscale x 16 x float> poison) + + %V1F64 = call <1 x double> @llvm.copysign(<1 x double> poison, <1 x double> poison) + %V2F64 = call <2 x double> @llvm.copysign(<2 x double> poison, <2 x double> poison) + %V4F64 = call <4 x double> @llvm.copysign(<4 x double> poison, <4 x double> poison) + %V8F64 = call <8 x double> @llvm.copysign(<8 x double> poison, <8 x double> poison) + + %NXV1F64 = call <vscale x 1 x double> @llvm.copysign(<vscale x 1 x double> poison, <vscale x 1 x double> poison) + %NXV2F64 = call <vscale x 2 x double> @llvm.copysign(<vscale x 2 x double> poison, <vscale x 2 x double> poison) + %NXV4F64 = call <vscale x 4 x double> @llvm.copysign(<vscale x 4 x double> poison, <vscale x 4 x double> poison) + %NXV8F64 = call <vscale x 8 x double> @llvm.copysign(<vscale x 8 x double> poison, <vscale x 8 x double> poison) + + ret void +} + +define void @fcopysign_bf16() { +; ZVFH-LABEL: 'fcopysign_bf16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32BF16 = call <32 x bfloat> @llvm.copysign.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fcopysign_bf16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32BF16 = call <32 x bfloat> @llvm.copysign.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'fcopysign_bf16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = call <32 x bfloat> @llvm.copysign.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %BF16 = call bfloat @llvm.copysign(bfloat poison, bfloat poison) + + %V1BF16 = call <1 x bfloat> @llvm.copysign(<1 x bfloat> poison, <1 x bfloat> poison) + %V2BF16 = call <2 x bfloat> @llvm.copysign(<2 x bfloat> poison, <2 x bfloat> poison) + %V4BF16 = call <4 x bfloat> @llvm.copysign(<4 x bfloat> poison, <4 x bfloat> poison) + %V8BF16 = call <8 x bfloat> @llvm.copysign(<8 x bfloat> poison, <8 x bfloat> poison) + %V16BF16 = call <16 x bfloat> @llvm.copysign(<16 x bfloat> poison, <16 x bfloat> poison) + %V32BF16 = call <32 x bfloat> @llvm.copysign(<32 x bfloat> poison, <32 x bfloat> poison) + + %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) + %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) + %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) + %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) + %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) + %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison) ret void } define void @fcopysign_f16() { ; ZVFH-LABEL: 'fcopysign_f16' -; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half poison, half poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> poison, <1 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> poison, <2 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> poison, <4 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> poison, <8 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> poison, <16 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> poison, <32 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison) ; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; ZVFHMIN-LABEL: 'fcopysign_f16' -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half poison, half poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> poison, <1 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> poison, <2 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> poison, <4 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> poison, <8 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> poison, <16 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> poison, <32 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %F16 = call half @llvm.copysign.f16(half undef, half undef) - - %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef) - %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef) - %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef) - %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef) - %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef) - %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef) - - %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef) - %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef) - %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef) - %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef) - %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef) - %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef) +; NO-ZFHMIN-LABEL: 'fcopysign_f16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half poison, half poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> poison, <1 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> poison, <2 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> poison, <4 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> poison, <8 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> poison, <16 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> poison, <32 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %F16 = call half @llvm.copysign(half poison, half poison) + + %V1F16 = call <1 x half> @llvm.copysign(<1 x half> poison, <1 x half> poison) + %V2F16 = call <2 x half> @llvm.copysign(<2 x half> poison, <2 x half> poison) + %V4F16 = call <4 x half> @llvm.copysign(<4 x half> poison, <4 x half> poison) + %V8F16 = call <8 x half> @llvm.copysign(<8 x half> poison, <8 x half> poison) + %V16F16 = call <16 x half> @llvm.copysign(<16 x half> poison, <16 x half> poison) + %V32F16 = call <32 x half> @llvm.copysign(<32 x half> poison, <32 x half> poison) + + %NXV1F16 = call <vscale x 1 x half> @llvm.copysign(<vscale x 1 x half> poison, <vscale x 1 x half> poison) + %NXV2F16 = call <vscale x 2 x half> @llvm.copysign(<vscale x 2 x half> poison, <vscale x 2 x half> poison) + %NXV4F16 = call <vscale x 4 x half> @llvm.copysign(<vscale x 4 x half> poison, <vscale x 4 x half> poison) + %NXV8F16 = call <vscale x 8 x half> @llvm.copysign(<vscale x 8 x half> poison, <vscale x 8 x half> poison) + %NXV16F16 = call <vscale x 16 x half> @llvm.copysign(<vscale x 16 x half> poison, <vscale x 16 x half> poison) + %NXV32F16 = call <vscale x 32 x half> @llvm.copysign(<vscale x 32 x half> poison, <vscale x 32 x half> poison) ret void } define void @fma() { ; CHECK-LABEL: 'fma' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat undef, bfloat undef, bfloat undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = call <1 x float> @llvm.fma.v1f32(<1 x float> undef, <1 x float> undef, <1 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.fma.v1f64(<1 x double> undef, <1 x double> undef, <1 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = call <vscale x 1 x double> @llvm.fma.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.fma.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float poison, float poison, float poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double poison, double poison, double poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = call <1 x float> @llvm.fma.v1f32(<1 x float> poison, <1 x float> poison, <1 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> poison, <2 x float> poison, <2 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> poison, <8 x float> poison, <8 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> poison, <16 x float> poison, <16 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.fma.v1f64(<1 x double> poison, <1 x double> poison, <1 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> poison, <2 x double> poison, <2 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> poison, <4 x double> poison, <4 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> poison, <8 x double> poison, <8 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = call <vscale x 1 x double> @llvm.fma.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.fma.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %BF16 = call bfloat @llvm.fma.bf16(bfloat undef, bfloat undef, bfloat undef) - %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) - %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) - - %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x bfloat> undef) - %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef) - %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef) - %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x bfloat> undef) - %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef) - - %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef) - %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef) - %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef) - %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef) - %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef) - - %V1F32 = call <1 x float> @llvm.fma.v1f32(<1 x float> undef, <1 x float> undef, <1 x float> undef) - %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) - %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) - %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) - %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) - - %NXV1F32 = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef) - %NXV2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef) - %NXV4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef) - %NXV8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef) - %NXV16F32 = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef) - - %V1F64 = call <1 x double> @llvm.fma.v1f64(<1 x double> undef, <1 x double> undef, <1 x double> undef) - %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) - %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) - %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) - - %NXV1F64 = call <vscale x 1 x double> @llvm.fma.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef) - %NXV2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef) - %NXV4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef) - %NXV8F64 = call <vscale x 8 x double> @llvm.fma.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef) + %F32 = call float @llvm.fma(float poison, float poison, float poison) + %F64 = call double @llvm.fma(double poison, double poison, double poison) + + %V1F32 = call <1 x float> @llvm.fma(<1 x float> poison, <1 x float> poison, <1 x float> poison) + %V2F32 = call <2 x float> @llvm.fma(<2 x float> poison, <2 x float> poison, <2 x float> poison) + %V4F32 = call <4 x float> @llvm.fma(<4 x float> poison, <4 x float> poison, <4 x float> poison) + %V8F32 = call <8 x float> @llvm.fma(<8 x float> poison, <8 x float> poison, <8 x float> poison) + %V16F32 = call <16 x float> @llvm.fma(<16 x float> poison, <16 x float> poison, <16 x float> poison) + + %NXV1F32 = call <vscale x 1 x float> @llvm.fma(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison) + %NXV2F32 = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison) + %NXV4F32 = call <vscale x 4 x float> @llvm.fma(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison) + %NXV8F32 = call <vscale x 8 x float> @llvm.fma(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison) + %NXV16F32 = call <vscale x 16 x float> @llvm.fma(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x float> poison) + + %V1F64 = call <1 x double> @llvm.fma(<1 x double> poison, <1 x double> poison, <1 x double> poison) + %V2F64 = call <2 x double> @llvm.fma(<2 x double> poison, <2 x double> poison, <2 x double> poison) + %V4F64 = call <4 x double> @llvm.fma(<4 x double> poison, <4 x double> poison, <4 x double> poison) + %V8F64 = call <8 x double> @llvm.fma(<8 x double> poison, <8 x double> poison, <8 x double> poison) + + %NXV1F64 = call <vscale x 1 x double> @llvm.fma(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x double> poison) + %NXV2F64 = call <vscale x 2 x double> @llvm.fma(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison) + %NXV4F64 = call <vscale x 4 x double> @llvm.fma(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison) + %NXV8F64 = call <vscale x 8 x double> @llvm.fma(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison) + + ret void +} + +define void @fma_bf16() { +; ZVFH-LABEL: 'fma_bf16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32BF16 = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fma_bf16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32BF16 = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'fma_bf16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %BF16 = call bfloat @llvm.fma(bfloat poison, bfloat poison, bfloat poison) + + %V1BF16 = call <1 x bfloat> @llvm.fma(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison) + %V2BF16 = call <2 x bfloat> @llvm.fma(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison) + %V4BF16 = call <4 x bfloat> @llvm.fma(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison) + %V8BF16 = call <8 x bfloat> @llvm.fma(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison) + %V16BF16 = call <16 x bfloat> @llvm.fma(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison) + %V32BF16 = call <32 x bfloat> @llvm.fma(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison) + + %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) + %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) + %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) + %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) + %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) + %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison) ret void } define void @fma_f16() { -; CHECK-LABEL: 'fma_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half undef, half undef, half undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> undef, <1 x half> undef, <1 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> undef, <32 x half> undef, <32 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef, <vscale x 32 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fma_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half poison, half poison, half poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> poison, <1 x half> poison, <1 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> poison, <32 x half> poison, <32 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fma_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half poison, half poison, half poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> poison, <1 x half> poison, <1 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> poison, <32 x half> poison, <32 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'fma_f16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half poison, half poison, half poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> poison, <1 x half> poison, <1 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> poison, <32 x half> poison, <32 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %F16 = call half @llvm.fma.f16(half undef, half undef, half undef) - - %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> undef, <1 x half> undef, <1 x half> undef) - %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) - %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) - %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef) - %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef) - %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> undef, <32 x half> undef, <32 x half> undef) - - %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef) - %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef) - %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef) - %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef) - %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef) - %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef, <vscale x 32 x half> undef) + %F16 = call half @llvm.fma(half poison, half poison, half poison) + + %V1F16 = call <1 x half> @llvm.fma(<1 x half> poison, <1 x half> poison, <1 x half> poison) + %V2F16 = call <2 x half> @llvm.fma(<2 x half> poison, <2 x half> poison, <2 x half> poison) + %V4F16 = call <4 x half> @llvm.fma(<4 x half> poison, <4 x half> poison, <4 x half> poison) + %V8F16 = call <8 x half> @llvm.fma(<8 x half> poison, <8 x half> poison, <8 x half> poison) + %V16F16 = call <16 x half> @llvm.fma(<16 x half> poison, <16 x half> poison, <16 x half> poison) + %V32F16 = call <32 x half> @llvm.fma(<32 x half> poison, <32 x half> poison, <32 x half> poison) + + %NXV1F16 = call <vscale x 1 x half> @llvm.fma(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison) + %NXV2F16 = call <vscale x 2 x half> @llvm.fma(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison) + %NXV4F16 = call <vscale x 4 x half> @llvm.fma(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) + %NXV8F16 = call <vscale x 8 x half> @llvm.fma(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) + %NXV16F16 = call <vscale x 16 x half> @llvm.fma(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) + %NXV32F16 = call <vscale x 32 x half> @llvm.fma(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison) ret void } define void @fmuladd() { ; CHECK-LABEL: 'fmuladd' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat undef, bfloat undef, bfloat undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call float @llvm.fmuladd.f32(float undef, float undef, float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call double @llvm.fmuladd.f64(double undef, double undef, double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.fmuladd.v16f64(<16 x double> undef, <16 x double> undef, <16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %24 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %25 = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %28 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %29 = call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %30 = call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call float @llvm.fmuladd.f32(float poison, float poison, float poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call double @llvm.fmuladd.f64(double poison, double poison, double poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> poison, <2 x float> poison, <2 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> poison, <8 x float> poison, <8 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> poison, <16 x float> poison, <16 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> poison, <2 x double> poison, <2 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> poison, <4 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> poison, <8 x double> poison, <8 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %10 = call <16 x double> @llvm.fmuladd.v16f64(<16 x double> poison, <16 x double> poison, <16 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %15 = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x float> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %20 = call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x double> poison, <vscale x 16 x double> poison) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - call bfloat @llvm.fmuladd.bf16(bfloat undef, bfloat undef, bfloat undef) - call float @llvm.fmuladd.f32(float undef, float undef, float undef) - call double @llvm.fmuladd.f64(double undef, double undef, double undef) - call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef) - call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef) - call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x bfloat> undef) - call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef) - call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) - call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) - call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) - call <16 x float> @llvm.fmuladd.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) - call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) - call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) - call <8 x double> @llvm.fmuladd.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) - call <16 x double> @llvm.fmuladd.v16f64(<16 x double> undef, <16 x double> undef, <16 x double> undef) - call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef) - call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef) - call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef) - call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef) - call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef) - call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef) - call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef) - call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef) - call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef) - call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef) - call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef) - call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef) - call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef) - call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef) - call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x double> undef) + call float @llvm.fmuladd(float poison, float poison, float poison) + call double @llvm.fmuladd(double poison, double poison, double poison) + + call <2 x float> @llvm.fmuladd(<2 x float> poison, <2 x float> poison, <2 x float> poison) + call <4 x float> @llvm.fmuladd(<4 x float> poison, <4 x float> poison, <4 x float> poison) + call <8 x float> @llvm.fmuladd(<8 x float> poison, <8 x float> poison, <8 x float> poison) + call <16 x float> @llvm.fmuladd(<16 x float> poison, <16 x float> poison, <16 x float> poison) + + call <2 x double> @llvm.fmuladd(<2 x double> poison, <2 x double> poison, <2 x double> poison) + call <4 x double> @llvm.fmuladd(<4 x double> poison, <4 x double> poison, <4 x double> poison) + call <8 x double> @llvm.fmuladd(<8 x double> poison, <8 x double> poison, <8 x double> poison) + call <16 x double> @llvm.fmuladd(<16 x double> poison, <16 x double> poison, <16 x double> poison) + + call <vscale x 1 x float> @llvm.fmuladd(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison) + call <vscale x 2 x float> @llvm.fmuladd(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison) + call <vscale x 4 x float> @llvm.fmuladd(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison) + call <vscale x 8 x float> @llvm.fmuladd(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison) + call <vscale x 16 x float> @llvm.fmuladd(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x float> poison) + + call <vscale x 1 x double> @llvm.fmuladd(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x double> poison) + call <vscale x 2 x double> @llvm.fmuladd(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison) + call <vscale x 4 x double> @llvm.fmuladd(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison) + call <vscale x 8 x double> @llvm.fmuladd(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison) + call <vscale x 16 x double> @llvm.fmuladd(<vscale x 16 x double> poison, <vscale x 16 x double> poison, <vscale x 16 x double> poison) + + ret void +} + +define void @fmuladd_bf16() { +; ZVFH-LABEL: 'fmuladd_bf16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fmuladd_bf16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'fmuladd_bf16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call bfloat @llvm.fmuladd(bfloat poison, bfloat poison, bfloat poison) + + call <2 x bfloat> @llvm.fmuladd(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison) + call <4 x bfloat> @llvm.fmuladd(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison) + call <8 x bfloat> @llvm.fmuladd(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison) + call <16 x bfloat> @llvm.fmuladd(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison) + + call <vscale x 1 x bfloat> @llvm.fmuladd(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison) + call <vscale x 2 x bfloat> @llvm.fmuladd(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) + call <vscale x 4 x bfloat> @llvm.fmuladd(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) + call <vscale x 8 x bfloat> @llvm.fmuladd(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) + call <vscale x 16 x bfloat> @llvm.fmuladd(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) ret void } define void @fmuladd_f16() { -; CHECK-LABEL: 'fmuladd_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half undef, half undef, half undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fmuladd_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half poison, half poison, half poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fmuladd_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half poison, half poison, half poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-ZFHMIN-LABEL: 'fmuladd_f16' +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half poison, half poison, half poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - call half @llvm.fmuladd.f16(half undef, half undef, half undef) - call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) - call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) - call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef) - call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef) - call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef) - call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef) - call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef) - call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef) - call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef) + call half @llvm.fmuladd(half poison, half poison, half poison) + + call <2 x half> @llvm.fmuladd(<2 x half> poison, <2 x half> poison, <2 x half> poison) + call <4 x half> @llvm.fmuladd(<4 x half> poison, <4 x half> poison, <4 x half> poison) + call <8 x half> @llvm.fmuladd(<8 x half> poison, <8 x half> poison, <8 x half> poison) + call <16 x half> @llvm.fmuladd(<16 x half> poison, <16 x half> poison, <16 x half> poison) + + call <vscale x 1 x half> @llvm.fmuladd(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison) + call <vscale x 2 x half> @llvm.fmuladd(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison) + call <vscale x 4 x half> @llvm.fmuladd(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) + call <vscale x 8 x half> @llvm.fmuladd(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) + call <vscale x 16 x half> @llvm.fmuladd(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) ret void } diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll index ece528d..e3305c0 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; Check getShuffleCost for scalable vector -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=CHECK,ARGBASED +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+v -intrinsic-cost-strategy=type-based-intrinsic-cost < %s | FileCheck %s --check-prefixes=CHECK,TYPEBASED ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+m,+v -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE define void @vector_broadcast() { @@ -51,12 +52,19 @@ define void @vector_broadcast() { } define void @vector_insert_extract(<vscale x 4 x i32> %v0, <vscale x 16 x i32> %v1, <16 x i32> %v2) { -; CHECK-LABEL: 'vector_insert_extract' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ARGBASED-LABEL: 'vector_insert_extract' +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPEBASED-LABEL: 'vector_insert_extract' +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'vector_insert_extract' ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0) @@ -140,22 +148,39 @@ define void @vector_reverse() { } define void @vector_splice() { -; CHECK-LABEL: 'vector_splice' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ARGBASED-LABEL: 'vector_splice' +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPEBASED-LABEL: 'vector_splice' +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'vector_splice' ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll index 4bb4818..71746ca 100644 --- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll @@ -1873,155 +1873,80 @@ define void @is.fpclass() { } define void @reverse() { -; ARGBASED-LABEL: 'reverse' -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> poison, <2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> poison, <4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> poison, <8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> poison, <16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.reverse.v2i8(<2 x i8> poison, <2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.reverse.v4i8(<4 x i8> poison, <4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.reverse.v8i8(<8 x i8> poison, <8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> poison, <16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.reverse.v2i16(<2 x i16> poison, <2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.reverse.v4i16(<4 x i16> poison, <4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> poison, <8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.reverse.v16i16(<16 x i16> poison, <16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.reverse.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.reverse.v16i32(<16 x i32> poison, <16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> poison, <2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.reverse.v4i64(<4 x i64> poison, <4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.reverse.v8i64(<8 x i64> poison, <8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.reverse.v16i64(<16 x i64> poison, <16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.reverse.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.reverse.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.reverse.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = call <2 x half> @llvm.experimental.vp.reverse.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = call <4 x half> @llvm.experimental.vp.reverse.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = call <8 x half> @llvm.experimental.vp.reverse.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %28 = call <16 x half> @llvm.experimental.vp.reverse.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call <2 x float> @llvm.experimental.vp.reverse.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %31 = call <8 x float> @llvm.experimental.vp.reverse.v8f32(<8 x float> poison, <8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %32 = call <16 x float> @llvm.experimental.vp.reverse.v16f32(<16 x float> poison, <16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> poison, <2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %34 = call <4 x double> @llvm.experimental.vp.reverse.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %35 = call <8 x double> @llvm.experimental.vp.reverse.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %36 = call <16 x double> @llvm.experimental.vp.reverse.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %37 = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %38 = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> poison, <vscale x 4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %39 = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> poison, <vscale x 8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %40 = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> poison, <vscale x 16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %41 = call <vscale x 2 x i8> @llvm.experimental.vp.reverse.nxv2i8(<vscale x 2 x i8> poison, <vscale x 2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = call <vscale x 4 x i8> @llvm.experimental.vp.reverse.nxv4i8(<vscale x 4 x i8> poison, <vscale x 4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %43 = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %44 = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %45 = call <vscale x 2 x i16> @llvm.experimental.vp.reverse.nxv2i16(<vscale x 2 x i16> poison, <vscale x 2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %47 = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %48 = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %49 = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %51 = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %52 = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> poison, <vscale x 16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %53 = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %54 = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %55 = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> poison, <vscale x 8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %56 = call <vscale x 16 x i64> @llvm.experimental.vp.reverse.nxv16i64(<vscale x 16 x i64> poison, <vscale x 16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %57 = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %58 = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %59 = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %60 = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %61 = call <vscale x 2 x half> @llvm.experimental.vp.reverse.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %62 = call <vscale x 4 x half> @llvm.experimental.vp.reverse.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %63 = call <vscale x 8 x half> @llvm.experimental.vp.reverse.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %64 = call <vscale x 16 x half> @llvm.experimental.vp.reverse.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %65 = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %66 = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %67 = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %68 = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %69 = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %70 = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %71 = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %72 = call <vscale x 16 x double> @llvm.experimental.vp.reverse.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; TYPEBASED-LABEL: 'reverse' -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> poison, <2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %2 = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> poison, <4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %3 = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> poison, <8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %4 = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> poison, <16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.reverse.v2i8(<2 x i8> poison, <2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.reverse.v4i8(<4 x i8> poison, <4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.reverse.v8i8(<8 x i8> poison, <8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> poison, <16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.reverse.v2i16(<2 x i16> poison, <2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.reverse.v4i16(<4 x i16> poison, <4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> poison, <8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.reverse.v16i16(<16 x i16> poison, <16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.reverse.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.reverse.v16i32(<16 x i32> poison, <16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> poison, <2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.reverse.v4i64(<4 x i64> poison, <4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.reverse.v8i64(<8 x i64> poison, <8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.reverse.v16i64(<16 x i64> poison, <16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.reverse.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.reverse.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.reverse.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <2 x half> @llvm.experimental.vp.reverse.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %26 = call <4 x half> @llvm.experimental.vp.reverse.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %27 = call <8 x half> @llvm.experimental.vp.reverse.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %28 = call <16 x half> @llvm.experimental.vp.reverse.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %29 = call <2 x float> @llvm.experimental.vp.reverse.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %30 = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %31 = call <8 x float> @llvm.experimental.vp.reverse.v8f32(<8 x float> poison, <8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %32 = call <16 x float> @llvm.experimental.vp.reverse.v16f32(<16 x float> poison, <16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %33 = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> poison, <2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %34 = call <4 x double> @llvm.experimental.vp.reverse.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %35 = call <8 x double> @llvm.experimental.vp.reverse.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %36 = call <16 x double> @llvm.experimental.vp.reverse.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %37 = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %38 = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> poison, <vscale x 4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %39 = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> poison, <vscale x 8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %40 = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> poison, <vscale x 16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %41 = call <vscale x 2 x i8> @llvm.experimental.vp.reverse.nxv2i8(<vscale x 2 x i8> poison, <vscale x 2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %42 = call <vscale x 4 x i8> @llvm.experimental.vp.reverse.nxv4i8(<vscale x 4 x i8> poison, <vscale x 4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %43 = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %44 = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %45 = call <vscale x 2 x i16> @llvm.experimental.vp.reverse.nxv2i16(<vscale x 2 x i16> poison, <vscale x 2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %46 = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %47 = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %48 = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %49 = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %50 = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %51 = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %52 = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> poison, <vscale x 16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %53 = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %54 = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %55 = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> poison, <vscale x 8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %56 = call <vscale x 16 x i64> @llvm.experimental.vp.reverse.nxv16i64(<vscale x 16 x i64> poison, <vscale x 16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %57 = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %58 = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %59 = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %60 = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %61 = call <vscale x 2 x half> @llvm.experimental.vp.reverse.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %62 = call <vscale x 4 x half> @llvm.experimental.vp.reverse.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %63 = call <vscale x 8 x half> @llvm.experimental.vp.reverse.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %64 = call <vscale x 16 x half> @llvm.experimental.vp.reverse.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %65 = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %66 = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %67 = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %68 = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %69 = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %70 = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %71 = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %72 = call <vscale x 16 x double> @llvm.experimental.vp.reverse.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-LABEL: 'reverse' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.reverse.v2i8(<2 x i8> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.reverse.v4i8(<4 x i8> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.reverse.v8i8(<8 x i8> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.reverse.v2i16(<2 x i16> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.reverse.v4i16(<4 x i16> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.reverse.v16i16(<16 x i16> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.reverse.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.reverse.v16i32(<16 x i32> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.reverse.v4i64(<4 x i64> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.reverse.v8i64(<8 x i64> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.reverse.v16i64(<16 x i64> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.reverse.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.reverse.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.reverse.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = call <2 x half> @llvm.experimental.vp.reverse.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = call <4 x half> @llvm.experimental.vp.reverse.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = call <8 x half> @llvm.experimental.vp.reverse.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %28 = call <16 x half> @llvm.experimental.vp.reverse.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call <2 x float> @llvm.experimental.vp.reverse.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %31 = call <8 x float> @llvm.experimental.vp.reverse.v8f32(<8 x float> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %32 = call <16 x float> @llvm.experimental.vp.reverse.v16f32(<16 x float> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %34 = call <4 x double> @llvm.experimental.vp.reverse.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %35 = call <8 x double> @llvm.experimental.vp.reverse.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %36 = call <16 x double> @llvm.experimental.vp.reverse.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %37 = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %38 = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %39 = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %40 = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %41 = call <vscale x 2 x i8> @llvm.experimental.vp.reverse.nxv2i8(<vscale x 2 x i8> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = call <vscale x 4 x i8> @llvm.experimental.vp.reverse.nxv4i8(<vscale x 4 x i8> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %43 = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %44 = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %45 = call <vscale x 2 x i16> @llvm.experimental.vp.reverse.nxv2i16(<vscale x 2 x i16> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %47 = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %48 = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %49 = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %51 = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %52 = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %53 = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %54 = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %55 = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %56 = call <vscale x 16 x i64> @llvm.experimental.vp.reverse.nxv16i64(<vscale x 16 x i64> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %57 = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %58 = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %59 = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %60 = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %61 = call <vscale x 2 x half> @llvm.experimental.vp.reverse.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %62 = call <vscale x 4 x half> @llvm.experimental.vp.reverse.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %63 = call <vscale x 8 x half> @llvm.experimental.vp.reverse.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %64 = call <vscale x 16 x half> @llvm.experimental.vp.reverse.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %65 = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %66 = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %67 = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %68 = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %69 = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %70 = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %71 = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %72 = call <vscale x 16 x double> @llvm.experimental.vp.reverse.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> poison, <2 x i1> poison, i32 poison) call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> poison, <4 x i1> poison, i32 poison) diff --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll index 485e7f6..c53a507 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl.ll @@ -414,8 +414,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX1-LABEL: 'splatvar_funnel_i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:13 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -423,8 +423,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX2-LABEL: 'splatvar_funnel_i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:11 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:14 SizeLat:14 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:14 Lat:28 SizeLat:28 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -432,8 +432,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512F-LABEL: 'splatvar_funnel_i64' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -441,8 +441,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512BW-LABEL: 'splatvar_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -450,8 +450,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512DQ-LABEL: 'splatvar_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -459,8 +459,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -486,8 +486,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; XOP-LABEL: 'splatvar_funnel_i64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -495,8 +495,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -532,8 +532,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX1-LABEL: 'splatvar_funnel_i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -541,8 +541,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX2-LABEL: 'splatvar_funnel_i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -550,8 +550,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512F-LABEL: 'splatvar_funnel_i32' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -559,8 +559,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512BW-LABEL: 'splatvar_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -568,8 +568,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512DQ-LABEL: 'splatvar_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -577,8 +577,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -604,8 +604,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; XOP-LABEL: 'splatvar_funnel_i32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -613,8 +613,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -631,108 +631,108 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { ; SSSE3-LABEL: 'splatvar_funnel_i16' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:10 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:20 CodeSize:17 Lat:19 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:40 CodeSize:33 Lat:37 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i16' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i16' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i16' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i16' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i16' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i16' -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i16' -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_funnel_i16' -; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:7 Lat:12 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:23 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:25 Lat:45 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_funnel_i16' -; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_funnel_i16' -; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i16' -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) @@ -749,108 +749,108 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { ; SSSE3-LABEL: 'splatvar_funnel_i8' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:29 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:35 Lat:57 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:69 Lat:113 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i8' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i8' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:16 Lat:18 SizeLat:22 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:36 Lat:26 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:72 Lat:52 SizeLat:94 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i8' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:20 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:21 SizeLat:27 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:42 SizeLat:54 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i8' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i8' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i8' -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i8' -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_funnel_i8' -; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:16 Lat:31 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:31 Lat:61 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:104 CodeSize:61 Lat:121 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_funnel_i8' -; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_funnel_i8' -; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:25 Lat:23 SizeLat:31 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:50 Lat:46 SizeLat:62 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i8' -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) @@ -1934,8 +1934,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX1-LABEL: 'splatvar_rotate_i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:18 SizeLat:22 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:36 SizeLat:44 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) @@ -1943,8 +1943,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX2-LABEL: 'splatvar_rotate_i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:11 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:22 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) @@ -1952,8 +1952,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512-LABEL: 'splatvar_rotate_i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) @@ -1979,8 +1979,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; XOP-LABEL: 'splatvar_rotate_i64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) @@ -2007,8 +2007,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX1-LABEL: 'splatvar_rotate_i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) @@ -2016,8 +2016,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX2-LABEL: 'splatvar_rotate_i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) @@ -2025,8 +2025,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512-LABEL: 'splatvar_rotate_i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) @@ -2052,8 +2052,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; XOP-LABEL: 'splatvar_rotate_i32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) @@ -2070,99 +2070,99 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { ; SSE-LABEL: 'splatvar_rotate_i16' -; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; SSE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; SSE-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i16' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i16' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i16' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_rotate_i16' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_rotate_i16' -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_rotate_i16' -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_rotate_i16' -; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_rotate_i16' -; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_rotate_i16' -; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_rotate_i16' -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) @@ -2179,99 +2179,99 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { ; SSE-LABEL: 'splatvar_rotate_i8' -; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; SSE-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; SSE-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i8' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:15 SizeLat:19 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:28 Lat:21 SizeLat:38 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:56 Lat:42 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i8' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:23 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:32 Lat:36 SizeLat:46 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i8' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_rotate_i8' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_rotate_i8' -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_rotate_i8' -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_rotate_i8' -; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_rotate_i8' -; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_rotate_i8' -; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_rotate_i8' -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) diff --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll index af9a91b..1990605 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr.ll @@ -414,8 +414,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX1-LABEL: 'splatvar_funnel_i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:13 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -423,8 +423,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX2-LABEL: 'splatvar_funnel_i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:11 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:14 SizeLat:14 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:14 Lat:28 SizeLat:28 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -432,8 +432,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512F-LABEL: 'splatvar_funnel_i64' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -441,8 +441,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512BW-LABEL: 'splatvar_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -450,8 +450,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512DQ-LABEL: 'splatvar_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -459,8 +459,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -486,8 +486,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; XOP-LABEL: 'splatvar_funnel_i64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -495,8 +495,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) @@ -532,8 +532,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX1-LABEL: 'splatvar_funnel_i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -541,8 +541,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX2-LABEL: 'splatvar_funnel_i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -550,8 +550,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512F-LABEL: 'splatvar_funnel_i32' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -559,8 +559,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512BW-LABEL: 'splatvar_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -568,8 +568,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512DQ-LABEL: 'splatvar_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -577,8 +577,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -604,8 +604,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; XOP-LABEL: 'splatvar_funnel_i32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -613,8 +613,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) @@ -631,108 +631,108 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { ; SSSE3-LABEL: 'splatvar_funnel_i16' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:10 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:20 CodeSize:17 Lat:19 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:40 CodeSize:33 Lat:37 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i16' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i16' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i16' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i16' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i16' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i16' -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i16' -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_funnel_i16' -; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:7 Lat:12 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:23 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:25 Lat:45 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_funnel_i16' -; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_funnel_i16' -; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i16' -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) @@ -749,108 +749,108 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { ; SSSE3-LABEL: 'splatvar_funnel_i8' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:29 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:35 Lat:57 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:69 Lat:113 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i8' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i8' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:16 Lat:18 SizeLat:22 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:36 Lat:26 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:72 Lat:52 SizeLat:94 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i8' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:20 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:21 SizeLat:27 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:42 SizeLat:54 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i8' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i8' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i8' -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_funnel_i8' -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_funnel_i8' -; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:16 Lat:31 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:31 Lat:61 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:104 CodeSize:61 Lat:121 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_funnel_i8' -; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_funnel_i8' -; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:25 Lat:23 SizeLat:31 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:50 Lat:46 SizeLat:62 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_funnel_i8' -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) @@ -1934,8 +1934,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX1-LABEL: 'splatvar_rotate_i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:18 SizeLat:22 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:36 SizeLat:44 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) @@ -1943,8 +1943,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX2-LABEL: 'splatvar_rotate_i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:11 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:22 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) @@ -1952,8 +1952,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; AVX512-LABEL: 'splatvar_rotate_i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) @@ -1979,8 +1979,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; ; XOP-LABEL: 'splatvar_rotate_i64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) @@ -2007,8 +2007,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX1-LABEL: 'splatvar_rotate_i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) @@ -2016,8 +2016,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX2-LABEL: 'splatvar_rotate_i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) @@ -2025,8 +2025,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; AVX512-LABEL: 'splatvar_rotate_i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) @@ -2052,8 +2052,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; ; XOP-LABEL: 'splatvar_rotate_i32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) @@ -2070,99 +2070,99 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) { ; SSE-LABEL: 'splatvar_rotate_i16' -; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; SSE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; SSE-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i16' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i16' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i16' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_rotate_i16' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_rotate_i16' -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_rotate_i16' -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_rotate_i16' -; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_rotate_i16' -; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_rotate_i16' -; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_rotate_i16' -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) @@ -2179,99 +2179,99 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { ; SSE-LABEL: 'splatvar_rotate_i8' -; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; SSE-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; SSE-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i8' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:15 SizeLat:19 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:28 Lat:21 SizeLat:38 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:56 Lat:42 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i8' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:23 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:32 Lat:36 SizeLat:46 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i8' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512F-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'splatvar_rotate_i8' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512DQ-LABEL: 'splatvar_rotate_i8' -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI2-LABEL: 'splatvar_rotate_i8' -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SLM-LABEL: 'splatvar_rotate_i8' -; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; SLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; SLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; GLM-LABEL: 'splatvar_rotate_i8' -; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; GLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; GLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'splatvar_rotate_i8' -; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512GFNI-LABEL: 'splatvar_rotate_i8' -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) diff --git a/llvm/test/Analysis/CostModel/X86/load-broadcast.ll b/llvm/test/Analysis/CostModel/X86/load-broadcast.ll index 4cd8e5b..ecf54ce 100644 --- a/llvm/test/Analysis/CostModel/X86/load-broadcast.ll +++ b/llvm/test/Analysis/CostModel/X86/load-broadcast.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVX512 -; -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVX512 +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; vXf64 @@ -18,24 +18,24 @@ define <2 x double> @broadcast_load_v2f64_v2f64(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v2f64_v2f64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; SSE2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v2f64_v2f64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; SSSE3-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; ; SSE42-LABEL: 'broadcast_load_v2f64_v2f64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; SSE42-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; ; AVX-LABEL: 'broadcast_load_v2f64_v2f64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; %load = load <2 x double>, ptr %src %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer @@ -44,24 +44,24 @@ define <2 x double> @broadcast_load_v2f64_v2f64(ptr %src) { define <4 x double> @broadcast_load_v2f64_v4f64(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v2f64_v4f64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; SSE2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v2f64_v4f64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; SSSE3-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; SSE42-LABEL: 'broadcast_load_v2f64_v4f64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; SSE42-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; AVX-LABEL: 'broadcast_load_v2f64_v4f64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; %load = load <2 x double>, ptr %src %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer @@ -70,24 +70,24 @@ define <4 x double> @broadcast_load_v2f64_v4f64(ptr %src) { define <8 x double> @broadcast_load_v2f64_v8f64(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v2f64_v8f64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; SSE2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v2f64_v8f64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; SSSE3-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; SSE42-LABEL: 'broadcast_load_v2f64_v8f64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; SSE42-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; AVX-LABEL: 'broadcast_load_v2f64_v8f64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; %load = load <2 x double>, ptr %src %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer @@ -96,16 +96,16 @@ define <8 x double> @broadcast_load_v2f64_v8f64(ptr %src) { define <2 x double> @broadcast_load_f64_v2f64(ptr %src) { ; SSE-LABEL: 'broadcast_load_f64_v2f64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8 -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <2 x double> poison, double %load, i32 0 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 +; SSE-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <2 x double> poison, double %load, i32 0 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; ; AVX-LABEL: 'broadcast_load_f64_v2f64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <2 x double> poison, double %load, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 +; AVX-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <2 x double> poison, double %load, i32 0 +; AVX-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst ; %load = load double, ptr %src %insert = insertelement <2 x double> poison, double %load, i32 0 @@ -115,28 +115,28 @@ define <2 x double> @broadcast_load_f64_v2f64(ptr %src) { define <4 x double> @broadcast_load_f64_v4f64(ptr %src) { ; SSE-LABEL: 'broadcast_load_f64_v4f64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8 -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 +; SSE-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; AVX1-LABEL: 'broadcast_load_f64_v4f64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8 -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 +; AVX1-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0 +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; AVX2-LABEL: 'broadcast_load_f64_v4f64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 +; AVX2-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0 +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; ; AVX512-LABEL: 'broadcast_load_f64_v4f64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8 -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 +; AVX512-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0 +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst ; %load = load double, ptr %src %insert = insertelement <4 x double> poison, double %load, i32 0 @@ -146,28 +146,28 @@ define <4 x double> @broadcast_load_f64_v4f64(ptr %src) { define <8 x double> @broadcast_load_f64_v8f64(ptr %src) { ; SSE-LABEL: 'broadcast_load_f64_v8f64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8 -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 +; SSE-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; AVX1-LABEL: 'broadcast_load_f64_v8f64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8 -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 +; AVX1-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0 +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; AVX2-LABEL: 'broadcast_load_f64_v8f64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 +; AVX2-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0 +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; ; AVX512-LABEL: 'broadcast_load_f64_v8f64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8 -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8 +; AVX512-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0 +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst ; %load = load double, ptr %src %insert = insertelement <8 x double> poison, double %load, i32 0 @@ -181,14 +181,14 @@ define <8 x double> @broadcast_load_f64_v8f64(ptr %src) { define <4 x float> @broadcast_load_v4f32_v4f32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4f32_v4f32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %bcst ; ; AVX-LABEL: 'broadcast_load_v4f32_v4f32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %bcst ; %load = load <4 x float>, ptr %src %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer @@ -197,14 +197,14 @@ define <4 x float> @broadcast_load_v4f32_v4f32(ptr %src) { define <8 x float> @broadcast_load_v4f32_v8f32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4f32_v8f32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %bcst ; ; AVX-LABEL: 'broadcast_load_v4f32_v8f32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %bcst ; %load = load <4 x float>, ptr %src %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer @@ -213,14 +213,14 @@ define <8 x float> @broadcast_load_v4f32_v8f32(ptr %src) { define <16 x float> @broadcast_load_v4f32_v16f32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4f32_v16f32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %bcst ; ; AVX-LABEL: 'broadcast_load_v4f32_v16f32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %bcst ; %load = load <4 x float>, ptr %src %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer @@ -229,14 +229,14 @@ define <16 x float> @broadcast_load_v4f32_v16f32(ptr %src) { define <2 x i64> @broadcast_load_v2i64_v2i64(ptr %src) { ; SSE-LABEL: 'broadcast_load_v2i64_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %bcst ; ; AVX-LABEL: 'broadcast_load_v2i64_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %bcst ; %load = load <2 x i64>, ptr %src %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer @@ -249,14 +249,14 @@ define <2 x i64> @broadcast_load_v2i64_v2i64(ptr %src) { define <4 x i64> @broadcast_load_v2i64_v4i64(ptr %src) { ; SSE-LABEL: 'broadcast_load_v2i64_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %bcst ; ; AVX-LABEL: 'broadcast_load_v2i64_v4i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %bcst ; %load = load <2 x i64>, ptr %src %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer @@ -265,14 +265,14 @@ define <4 x i64> @broadcast_load_v2i64_v4i64(ptr %src) { define <8 x i64> @broadcast_load_v2i64_v8i64(ptr %src) { ; SSE-LABEL: 'broadcast_load_v2i64_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %bcst ; ; AVX-LABEL: 'broadcast_load_v2i64_v8i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %bcst ; %load = load <2 x i64>, ptr %src %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer @@ -285,14 +285,14 @@ define <8 x i64> @broadcast_load_v2i64_v8i64(ptr %src) { define <4 x i32> @broadcast_load_v4i32_v4i32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4i32_v4i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %bcst ; ; AVX-LABEL: 'broadcast_load_v4i32_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %bcst ; %load = load <4 x i32>, ptr %src %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer @@ -301,14 +301,14 @@ define <4 x i32> @broadcast_load_v4i32_v4i32(ptr %src) { define <8 x i32> @broadcast_load_v4i32_v8i32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4i32_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %bcst ; ; AVX-LABEL: 'broadcast_load_v4i32_v8i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %bcst ; %load = load <4 x i32>, ptr %src %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer @@ -317,14 +317,14 @@ define <8 x i32> @broadcast_load_v4i32_v8i32(ptr %src) { define <16 x i32> @broadcast_load_v4i32_v16i32(ptr %src) { ; SSE-LABEL: 'broadcast_load_v4i32_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bcst +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 +; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %bcst ; ; AVX-LABEL: 'broadcast_load_v4i32_v16i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bcst +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16 +; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %bcst ; %load = load <4 x i32>, ptr %src %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer @@ -337,34 +337,34 @@ define <16 x i32> @broadcast_load_v4i32_v16i32(ptr %src) { define <8 x i16> @broadcast_load_v8i16_v8i16(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v8i16_v8i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v8i16_v8i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; ; SSE42-LABEL: 'broadcast_load_v8i16_v8i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; ; AVX1-LABEL: 'broadcast_load_v8i16_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; ; AVX2-LABEL: 'broadcast_load_v8i16_v8i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; ; AVX512-LABEL: 'broadcast_load_v8i16_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst ; %load = load <8 x i16>, ptr %src %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer @@ -373,34 +373,34 @@ define <8 x i16> @broadcast_load_v8i16_v8i16(ptr %src) { define <16 x i16> @broadcast_load_v8i16_v16i16(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v8i16_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v8i16_v16i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; ; SSE42-LABEL: 'broadcast_load_v8i16_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; ; AVX1-LABEL: 'broadcast_load_v8i16_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; ; AVX2-LABEL: 'broadcast_load_v8i16_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; ; AVX512-LABEL: 'broadcast_load_v8i16_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst ; %load = load <8 x i16>, ptr %src %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer @@ -409,34 +409,34 @@ define <16 x i16> @broadcast_load_v8i16_v16i16(ptr %src) { define <32 x i16> @broadcast_load_v8i16_v32i16(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v8i16_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v8i16_v32i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; ; SSE42-LABEL: 'broadcast_load_v8i16_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; ; AVX1-LABEL: 'broadcast_load_v8i16_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; ; AVX2-LABEL: 'broadcast_load_v8i16_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; ; AVX512-LABEL: 'broadcast_load_v8i16_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16 +; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst ; %load = load <8 x i16>, ptr %src %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer @@ -449,34 +449,34 @@ define <32 x i16> @broadcast_load_v8i16_v32i16(ptr %src) { define <16 x i8> @broadcast_load_v16i8_v16i8(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v16i8_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v16i8_v16i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; ; SSE42-LABEL: 'broadcast_load_v16i8_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; ; AVX1-LABEL: 'broadcast_load_v16i8_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; ; AVX2-LABEL: 'broadcast_load_v16i8_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; ; AVX512-LABEL: 'broadcast_load_v16i8_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst ; %load = load <16 x i8>, ptr %src %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer @@ -485,34 +485,34 @@ define <16 x i8> @broadcast_load_v16i8_v16i8(ptr %src) { define <32 x i8> @broadcast_load_v16i8_v32i8(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v16i8_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v16i8_v32i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; ; SSE42-LABEL: 'broadcast_load_v16i8_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; ; AVX1-LABEL: 'broadcast_load_v16i8_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; ; AVX2-LABEL: 'broadcast_load_v16i8_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; ; AVX512-LABEL: 'broadcast_load_v16i8_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst ; %load = load <16 x i8>, ptr %src %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer @@ -521,34 +521,34 @@ define <32 x i8> @broadcast_load_v16i8_v32i8(ptr %src) { define <64 x i8> @broadcast_load_v16i8_v64i8(ptr %src) { ; SSE2-LABEL: 'broadcast_load_v16i8_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; ; SSSE3-LABEL: 'broadcast_load_v16i8_v64i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; ; SSE42-LABEL: 'broadcast_load_v16i8_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; ; AVX1-LABEL: 'broadcast_load_v16i8_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; ; AVX2-LABEL: 'broadcast_load_v16i8_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; ; AVX512-LABEL: 'broadcast_load_v16i8_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16 +; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst ; %load = load <16 x i8>, ptr %src %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll index 3031629..aea7cc8 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -2032,7 +2032,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind ; AVX1-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) @@ -2040,7 +2040,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind ; AVX2-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) @@ -2048,7 +2048,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' ; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 -; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind ; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) @@ -2056,7 +2056,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind ; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) @@ -2091,7 +2091,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32 ; ; AVX1-LABEL: 'test_scatter_16i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> ; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) @@ -2099,7 +2099,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32 ; ; AVX2-LABEL: 'test_scatter_16i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> ; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) @@ -2107,7 +2107,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32 ; ; SKL-LABEL: 'test_scatter_16i32' ; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 -; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind ; SKL-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> ; SKL-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) @@ -2115,7 +2115,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32 ; ; AVX512-LABEL: 'test_scatter_16i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind ; AVX512-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> ; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index 8fae37b3..742b5b2 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -2032,7 +2032,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind ; AVX1-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) @@ -2040,7 +2040,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind ; AVX2-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) @@ -2048,7 +2048,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' ; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 -; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind ; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) @@ -2056,7 +2056,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind ; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) @@ -2091,7 +2091,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32 ; ; AVX1-LABEL: 'test_scatter_16i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> ; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) @@ -2099,7 +2099,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32 ; ; AVX2-LABEL: 'test_scatter_16i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> ; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) @@ -2107,7 +2107,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32 ; ; SKL-LABEL: 'test_scatter_16i32' ; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 -; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind ; SKL-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> ; SKL-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) @@ -2115,7 +2115,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32 ; ; AVX512-LABEL: 'test_scatter_16i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind ; AVX512-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> ; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll index a8ceebf..c4cf8b7 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll @@ -3,11 +3,11 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) { ; CHECK-LABEL: 'test_vXf16' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll index 1a2d098..79e1622 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll @@ -25,20 +25,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer @@ -56,20 +56,20 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer @@ -89,22 +89,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer @@ -125,22 +125,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer @@ -154,41 +154,41 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128 ; SSE2-LABEL: 'test_vXf16' ; SSE2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXf16' ; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXf16' ; SSE42-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf16' ; AVX2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf16' -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer @@ -244,49 +244,49 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi16' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi16' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer @@ -302,54 +302,54 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi8' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi8' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi8' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer @@ -365,46 +365,46 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSE2-LABEL: 'test_vXi1' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi1' ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi1' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi1' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi1' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi1' @@ -440,22 +440,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; AVX1-LABEL: 'test_upper_vXf32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 4, i32 4, i32 4, i32 4> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_upper_vXf32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 4, i32 4, i32 4, i32 4> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_upper_vXf32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2> ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 4, i32 4, i32 4, i32 4> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll index ccfb543..8bb9cbc 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll @@ -32,21 +32,21 @@ define void @shuffle_load() { ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 -; SSE-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 -; SSE-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 -; SSE-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 -; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 -; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 -; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 @@ -66,11 +66,11 @@ define void @shuffle_load() { ; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; SSE-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16 -; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32 -; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64 -; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16 @@ -95,21 +95,21 @@ define void @shuffle_load() { ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 @@ -129,11 +129,11 @@ define void @shuffle_load() { ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; SSE2-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16 @@ -158,21 +158,21 @@ define void @shuffle_load() { ; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 -; SSE3-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 -; SSE3-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 -; SSE3-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 -; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 -; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 -; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 @@ -192,11 +192,11 @@ define void @shuffle_load() { ; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; SSE3-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16 -; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32 -; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64 -; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16 @@ -221,21 +221,21 @@ define void @shuffle_load() { ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 -; AVX-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 -; AVX-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 -; AVX-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 -; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 -; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 -; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 @@ -255,11 +255,11 @@ define void @shuffle_load() { ; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; AVX-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16 -; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32 -; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64 -; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16 @@ -284,21 +284,21 @@ define void @shuffle_load() { ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 -; AVX2-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 -; AVX2-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 -; AVX2-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 -; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 -; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 -; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 @@ -318,11 +318,11 @@ define void @shuffle_load() { ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; AVX2-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16 -; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32 -; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64 -; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16 @@ -347,21 +347,21 @@ define void @shuffle_load() { ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 -; AVX512-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 -; AVX512-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 -; AVX512-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 -; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 -; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 -; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 @@ -381,11 +381,11 @@ define void @shuffle_load() { ; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; AVX512-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16 -; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32 -; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64 -; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16 diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16.ll index 4affc7e..f47e1a3 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16.ll @@ -3,11 +3,11 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) { ; CHECK-LABEL: 'test_vXf16' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; CHECK-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:4 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 0> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll index 1bcf2d5..aca0ba3 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll @@ -25,20 +25,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:12 SizeLat:4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:5 SizeLat:3 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0> @@ -56,20 +56,20 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:12 SizeLat:4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:5 SizeLat:3 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0> @@ -89,22 +89,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0> ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0> @@ -125,22 +125,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0> ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0> @@ -154,65 +154,65 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 6 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:6 SizeLat:6 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:12 SizeLat:12 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi16' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi16' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:5 Lat:9 SizeLat:5 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:10 Lat:18 SizeLat:10 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:18 SizeLat:8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0> @@ -227,71 +227,71 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-LABEL: 'test_vXi8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> ; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 9 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 9 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 18 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 36 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:11 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:11 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:22 Lat:12 SizeLat:22 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:44 Lat:24 SizeLat:44 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi8' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi8' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:5 Lat:9 SizeLat:5 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:10 Lat:18 SizeLat:10 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi8' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:18 SizeLat:8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:3 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void @@ -320,22 +320,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; AVX1-LABEL: 'test_upper_vXf32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 7, i32 6, i32 5, i32 4> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> +; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_upper_vXf32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 7, i32 6, i32 5, i32 4> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16> +; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> +; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_upper_vXf32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2> ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 7, i32 6, i32 5, i32 4> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16> +; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll index 007aa59bc..21d5766 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll @@ -3,15 +3,15 @@ define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %src512, <64 x half> %src1024) { ; CHECK-LABEL: 'test_vXf16' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> - %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ret void } diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll index 3727d7b..11f14df 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll @@ -21,42 +21,42 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1> ; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> +; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0> ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXf64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1> ; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 10 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1> %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> - %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> - %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0> ret void } @@ -64,79 +64,93 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1> ; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1> ; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1> %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> - %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0> ret void } define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { -; SSE-LABEL: 'test_vXf32' -; SSE-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> -; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> +; SSE2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXf32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> ; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 9 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> - %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> ret void } @@ -144,48 +158,48 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0> -; SSE-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE-NEXT: Cost Model: Found costs of 10 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0> +; SSE-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; SSE-NEXT: Cost Model: Found costs of 18 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; XOP-NEXT: Cost Model: Found costs of 26 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; AVX1-NEXT: Cost Model: Found costs of 26 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 9 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; AVX2-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1> %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0> - %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0> - %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ret void } @@ -193,90 +207,90 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1> -; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:5 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:24 Lat:24 SizeLat:24 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:26 Lat:26 SizeLat:26 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:5 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:24 Lat:24 SizeLat:24 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; SSE2-NEXT: Cost Model: Found costs of RThru:42 CodeSize:56 Lat:56 SizeLat:56 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:72 CodeSize:94 Lat:94 SizeLat:94 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi16' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; SSSE3-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 39 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi16' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; SSE42-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 35 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi16' -; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1> -; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; XOP-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 63 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 16 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; AVX1-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 99 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 10 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; AVX2-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 44 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1> -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 21 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of 84 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1> -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> ; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1> - %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0> - %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ret void } @@ -286,88 +300,88 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1> ; SSE2-NEXT: Cost Model: Found costs of 5 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1> ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:19 Lat:19 SizeLat:19 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:35 CodeSize:37 Lat:37 SizeLat:37 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:26 Lat:26 SizeLat:26 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:77 CodeSize:91 Lat:91 SizeLat:91 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi8' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1> ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1> ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1> ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1> ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi8' -; XOP-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi8' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 8 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi8' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1> ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1> ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1> %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1> %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> - %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0> ret void } diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll index 6d22546..c98efcd 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll @@ -25,20 +25,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1> @@ -56,20 +56,20 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1> @@ -89,22 +89,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1> @@ -125,22 +125,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1> @@ -154,41 +154,41 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128 ; SSE2-LABEL: 'test_vXf16' ; SSE2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1> ; SSE2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXf16' ; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1> ; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXf16' ; SSE42-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1> ; SSE42-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf16' ; AVX2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf16' -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1> @@ -244,49 +244,49 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi16' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi16' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1> @@ -302,54 +302,54 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi8' -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi8' -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi8' -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1> @@ -365,46 +365,46 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSE2-LABEL: 'test_vXi1' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1> ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi1' ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1> ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi1' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1> ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi1' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi1' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi1' @@ -440,22 +440,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; AVX1-LABEL: 'test_upper_vXf32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11> -; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11> +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_upper_vXf32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11> +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_upper_vXf32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3> ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11> -; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll index 56afc01..3016316 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll @@ -22,41 +22,41 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0> ; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> ; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> -; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXf64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0> ; XOP-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> ; XOP-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> -; XOP-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> ; AVX1-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> -; AVX1-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> ; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> -; AVX2-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> -; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0> %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> - %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ret void } @@ -65,41 +65,41 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, ; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0> ; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> ; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> -; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0> ; XOP-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> ; XOP-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> -; XOP-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> ; AVX1-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> -; AVX1-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> ; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> -; AVX2-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> -; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0> %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6> %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15> - %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0> ret void } @@ -107,129 +107,113 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE2-LABEL: 'test_vXf32' ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0> ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; SSE2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; SSE2-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of 22 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXf32' ; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0> ; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 22 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXf32' ; SSE42-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0> ; SSE42-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 7 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 9 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXf32' ; XOP-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0> ; XOP-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; XOP-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0> ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0> ; AVX2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 21 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0> %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> - %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> - %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> + %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> + %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ret void } define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { -; SSE2-LABEL: 'test_vXi32' -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; SSE2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void -; -; SSSE3-LABEL: 'test_vXi32' -; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void -; -; SSE42-LABEL: 'test_vXi32' -; SSE42-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0> +; SSE-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> +; SSE-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; SSE-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; SSE-NEXT: Cost Model: Found costs of 23 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi32' ; XOP-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0> ; XOP-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; XOP-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0> ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0> ; AVX2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 21 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0> %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> - %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> - %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0> ret void } @@ -238,89 +222,89 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0> ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:11 Lat:11 SizeLat:11 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:21 CodeSize:27 Lat:27 SizeLat:27 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:30 CodeSize:36 Lat:36 SizeLat:36 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:24 Lat:24 SizeLat:24 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:51 Lat:51 SizeLat:51 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:69 CodeSize:91 Lat:91 SizeLat:91 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0> ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 37 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0> ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 19 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 33 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi16' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 26 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; XOP-NEXT: Cost Model: Found costs of 54 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi16' ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 42 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 90 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi16' ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 17 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 42 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'test_vXi16' ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0> ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 84 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of 168 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7> ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'test_vXi16' ; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0> ; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0> ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0> %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> - %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7> ret void } @@ -330,8 +314,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found costs of 4 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; SSE2-NEXT: Cost Model: Found costs of 7 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> ; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:13 Lat:13 SizeLat:13 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:23 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:41 Lat:41 SizeLat:41 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:42 Lat:42 SizeLat:42 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; SSE2-NEXT: Cost Model: Found costs of RThru:66 CodeSize:78 Lat:78 SizeLat:78 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -339,8 +323,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSSE3-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 12 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; SSSE3-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -348,8 +332,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; SSE42-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 10 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; SSE42-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi8' @@ -357,8 +341,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; XOP-NEXT: Cost Model: Found costs of 13 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; XOP-NEXT: Cost Model: Found costs of 36 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -366,8 +350,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX1-NEXT: Cost Model: Found costs of 23 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; AVX1-NEXT: Cost Model: Found costs of 54 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -375,8 +359,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; AVX2-NEXT: Cost Model: Found costs of 23 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'test_vXi8' @@ -384,8 +368,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0> ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'test_vXi8' @@ -393,8 +377,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512BW-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; AVX512BW-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> ; AVX512BW-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512BW-NEXT: Cost Model: Found costs of 19 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; AVX512BW-NEXT: Cost Model: Found costs of 19 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' @@ -402,15 +386,15 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> <i32 3, i32 0> %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5> %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0> %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0> - %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0> ret void } diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll index bd642791..c53da90 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll @@ -531,31 +531,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; @@ -574,31 +574,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; @@ -660,31 +660,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; @@ -703,31 +703,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; @@ -740,43 +740,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; @@ -789,43 +789,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; @@ -838,67 +838,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v32i16' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v32i16' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v32i16' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v32i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; @@ -911,67 +911,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:9 Lat:6 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = ashr <16 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v16i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v16i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v16i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v16i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; @@ -984,67 +984,67 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:16 Lat:11 SizeLat:21 for: %shift = ashr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = ashr <32 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v32i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v32i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v32i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v32i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; @@ -1057,67 +1057,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:32 Lat:22 SizeLat:42 for: %shift = ashr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = ashr <64 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v64i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index fdf011e..7a06269 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -531,31 +531,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; @@ -574,31 +574,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; @@ -660,31 +660,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; @@ -703,31 +703,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; @@ -740,43 +740,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; @@ -789,43 +789,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; @@ -838,67 +838,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v32i16' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v32i16' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v32i16' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v32i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; @@ -911,67 +911,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:9 Lat:6 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = ashr <16 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v16i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v16i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v16i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v16i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; @@ -984,67 +984,67 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:16 Lat:11 SizeLat:21 for: %shift = ashr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = ashr <32 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v32i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v32i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v32i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v32i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; @@ -1057,67 +1057,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:32 Lat:22 SizeLat:42 for: %shift = ashr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = ashr <64 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v64i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll index 2f70a4b..ee7546f 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll @@ -527,31 +527,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; @@ -570,31 +570,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; @@ -656,31 +656,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; @@ -699,31 +699,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; @@ -736,43 +736,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; @@ -785,43 +785,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; @@ -834,67 +834,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v32i16' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v32i16' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v32i16' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v32i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; @@ -907,67 +907,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:8 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = lshr <16 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v16i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v16i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v16i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v16i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; @@ -980,43 +980,43 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:9 SizeLat:14 for: %shift = lshr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = lshr <32 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v32i8' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; @@ -1029,67 +1029,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:18 SizeLat:28 for: %shift = lshr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = lshr <64 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v64i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index 78fd9f33..a508c2a 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -527,31 +527,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; @@ -570,31 +570,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; @@ -656,31 +656,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; @@ -699,31 +699,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; @@ -736,43 +736,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; @@ -785,43 +785,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; @@ -834,67 +834,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v32i16' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v32i16' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v32i16' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v32i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; @@ -907,67 +907,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:8 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = lshr <16 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v16i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v16i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v16i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v16i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; @@ -980,43 +980,43 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:9 SizeLat:14 for: %shift = lshr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = lshr <32 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v32i8' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; @@ -1029,67 +1029,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:18 SizeLat:28 for: %shift = lshr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = lshr <64 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v64i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll index adedca8..375dac5 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll @@ -567,31 +567,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; @@ -610,31 +610,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; @@ -696,31 +696,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; @@ -739,31 +739,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; @@ -776,49 +776,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v8i16' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; @@ -831,49 +831,49 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i16' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; @@ -886,73 +886,73 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v32i16' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v32i16' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v32i16' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v32i16' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v32i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; @@ -965,49 +965,49 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:8 for: %shift = shl <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %shift = shl <16 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i8' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i8' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; @@ -1020,49 +1020,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:11 Lat:8 SizeLat:14 for: %shift = shl <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %shift = shl <32 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v32i8' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v32i8' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; @@ -1075,73 +1075,73 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:22 Lat:16 SizeLat:28 for: %shift = shl <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %shift = shl <64 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v64i8' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v64i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll index a3536ec..b2435be 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -567,31 +567,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift ; @@ -610,31 +610,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift ; @@ -696,31 +696,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift ; @@ -739,31 +739,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift ; @@ -776,49 +776,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v8i16' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift ; @@ -831,49 +831,49 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i16' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift ; @@ -886,73 +886,73 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v32i16' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v32i16' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v32i16' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v32i16' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v32i16' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift ; @@ -965,49 +965,49 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:8 for: %shift = shl <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %shift = shl <16 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i8' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i8' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift ; @@ -1020,49 +1020,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:11 Lat:8 SizeLat:14 for: %shift = shl <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %shift = shl <32 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v32i8' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v32i8' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift ; @@ -1075,73 +1075,73 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:22 Lat:16 SizeLat:28 for: %shift = shl <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' ; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %shift = shl <64 x i8> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v64i8' ; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatvar_shift_v64i8' ; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat ; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift ; diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll index d330152..e0def90 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll @@ -113,7 +113,7 @@ define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; ; NORMALIZE-LABEL: 'banerjee1' ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 @@ -127,7 +127,7 @@ define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; NORMALIZE-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 ; NORMALIZE-NEXT: da analyze - confused! ; NORMALIZE-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 -; NORMALIZE-NEXT: da analyze - output [* *]! +; NORMALIZE-NEXT: da analyze - confused! ; ; DELIN-LABEL: 'banerjee1' ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 @@ -141,7 +141,7 @@ define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; DELIN-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 ; DELIN-NEXT: da analyze - confused! ; DELIN-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 -; DELIN-NEXT: da analyze - output [* *]! +; DELIN-NEXT: da analyze - confused! ; entry: %cmp4 = icmp sgt i64 %n, 0 diff --git a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll index 3e3426a..bf2268b 100644 --- a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll +++ b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll @@ -8,11 +8,11 @@ define float @bug41488_test1(float %f) { ; CHECK-LABEL: 'bug41488_test1' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT: da analyze - input [*]! +; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float %f, ptr %q, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store float %f, ptr %q, align 4 --> Dst: store float %f, ptr %q, align 4 -; CHECK-NEXT: da analyze - output [*]! +; CHECK-NEXT: da analyze - confused! ; entry: %g = alloca float, align 4 @@ -34,11 +34,11 @@ for.cond.cleanup: define void @bug41488_test2(i32 %n) { ; CHECK-LABEL: 'bug41488_test2' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT: da analyze - input [*]! +; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float 0.000000e+00, ptr %q, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store float 0.000000e+00, ptr %q, align 4 --> Dst: store float 0.000000e+00, ptr %q, align 4 -; CHECK-NEXT: da analyze - output [*]! +; CHECK-NEXT: da analyze - confused! ; entry: %g = alloca float, align 4 @@ -68,7 +68,7 @@ define void @bug53942_foo(i32 noundef %n, ptr noalias nocapture noundef writeonl ; CHECK-NEXT: Src: %.pre = load double, ptr %B, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store double %.pre, ptr %arrayidx2, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 -; CHECK-NEXT: da analyze - output [*]! +; CHECK-NEXT: da analyze - confused! ; entry: %cmp8 = icmp sgt i32 %n, 1 @@ -99,11 +99,11 @@ for.body: ; preds = %for.body.preheader, define void @bug53942_bar(i32 noundef %n, ptr noalias noundef %A, ptr noalias noundef %B) { ; CHECK-LABEL: 'bug53942_bar' ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: %0 = load double, ptr %arrayidx, align 8 -; CHECK-NEXT: da analyze - input [*]! +; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store double %0, ptr %arrayidx8, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 -; CHECK-NEXT: da analyze - output [*]! +; CHECK-NEXT: da analyze - confused! ; entry: br label %for.cond @@ -166,14 +166,14 @@ for.end: ; preds = %for.cond.cleanup ; (j % 2 == 0 ? A[i][j] : A[i][j+1]) = 1; ; } ; -; FIXME: There are loop-carried dependencies between the store instruction. For +; There are loop-carried dependencies between the store instruction. For ; example, the value of %ptr0 when (i, j) = (0, 1) is %A+8, which is the same ; as when (i, j) = (0, 2). define void @non_invariant_baseptr_with_identical_obj(ptr %A) { ; CHECK-LABEL: 'non_invariant_baseptr_with_identical_obj' ; CHECK-NEXT: Src: store i32 1, ptr %idx, align 4 --> Dst: store i32 1, ptr %idx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - confused! ; entry: br label %loop.i.header @@ -216,13 +216,13 @@ exit: ; Similar to the above case, but ptr0 is loop-invariant with respsect to the ; k-loop. ; -; FIXME: Same as the above case, there are loop-carried dependencies between -; the store. +; Same as the above case, there are loop-carried dependencies between the +; store. define void @non_invariant_baseptr_with_identical_obj2(ptr %A) { ; CHECK-LABEL: 'non_invariant_baseptr_with_identical_obj2' ; CHECK-NEXT: Src: store i32 1, ptr %idx, align 4 --> Dst: store i32 1, ptr %idx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - confused! ; entry: br label %loop.i.header @@ -259,3 +259,58 @@ loop.i.latch: exit: ret void } + +; Pseudo-code that is approximately semantically equivalent to the below IR: +; +; void f(int A[][32]) { +; for (int i = 0; i < 100; i++) +; for (int j = 0; j < 15; j++) { +; int offset = (j % 2 == 0) ? 1 : 0; +; A[i][2 * j + offset + 0] = 1; +; A[i][2 * j + offset + 1] = 1; +; } +; } +; +; There are loop-carried dependencies between the two stores. For example, +; A[0][2] is accessed from both the former one when (i, j) = (0, 1) and the +; latter one when (i, j) = (0, 0). +; +define void @non_invariant_baseptr_with_identical_obj3(ptr %A) { +; CHECK-LABEL: 'non_invariant_baseptr_with_identical_obj3' +; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx0, align 4 +; CHECK-NEXT: da analyze - confused! +; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4 +; CHECK-NEXT: da analyze - confused! +; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4 +; CHECK-NEXT: da analyze - confused! +; +entry: + br label %loop.i.header + +loop.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %loop.i.latch ] + %A1 = getelementptr i32, ptr %A, i32 1 + br label %loop.j + +loop.j: + %j = phi i32 [ 0, %loop.i.header ], [ %j.inc, %loop.j ] + %ptr0 = phi ptr [ %A1, %loop.i.header ], [ %ptr1, %loop.j ] + %ptr1 = phi ptr [ %A, %loop.i.header ], [ %ptr0, %loop.j ] + %j2_0 = shl i32 %j, 1 + %j2_1 = add i32 %j2_0, 1 + %idx0 = getelementptr [32 x i32], ptr %ptr0, i32 %i, i32 %j2_0 + %idx1 = getelementptr [32 x i32], ptr %ptr0, i32 %i, i32 %j2_1 + store i32 1, ptr %idx0 + store i32 1, ptr %idx1 + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 15 + br i1 %cmp.j, label %loop.j, label %loop.i.latch + +loop.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 100 + br i1 %cmp.i, label %loop.i.header, label %exit + +exit: + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll index c0e1362..03343e7 100644 --- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll +++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll @@ -398,7 +398,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %2, ptr %B.addr.12, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; entry: %cmp4 = icmp sgt i64 %n, 0 @@ -475,7 +475,7 @@ define void @gcd7(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %11, ptr %B.addr.12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; entry: %0 = zext i32 %n to i64 @@ -566,7 +566,7 @@ define void @gcd8(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %5 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %5, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %5, ptr %B.addr.12, align 4 --> Dst: store i32 %5, ptr %B.addr.12, align 4 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; entry: %cmp4 = icmp sgt i32 %n, 0 @@ -650,7 +650,7 @@ define void @gcd9(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %11, ptr %B.addr.12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; entry: %0 = zext i32 %n to i64 diff --git a/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll b/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll index d983bd4..3e110ac 100644 --- a/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll +++ b/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll @@ -12,7 +12,7 @@ define void @f(ptr %a, i32 %n, i1 %arg) align 2 { ; CHECK-NEXT: Src: %t.2 = load ptr, ptr %a, align 4 --> Dst: %t.4 = load i32, ptr %t.3, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %t.4 = load i32, ptr %t.3, align 4 --> Dst: %t.4 = load i32, ptr %t.3, align 4 -; CHECK-NEXT: da analyze - input [* *]! +; CHECK-NEXT: da analyze - confused! ; for.preheader: %t.0 = ashr exact i32 %n, 3 diff --git a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll index 4ab8777..8cb0e2a 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll @@ -69,7 +69,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i64 %0, ptr %B.addr.24, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 -; CHECK-NEXT: da analyze - output [* * *]! +; CHECK-NEXT: da analyze - confused! ; entry: %cmp10 = icmp sgt i64 %n, 0 diff --git a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll index 4040187..e67cae7d 100644 --- a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll +++ b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll @@ -28,7 +28,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i64 %0, ptr %B.addr.24, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 -; CHECK-NEXT: da analyze - output [* * *]! +; CHECK-NEXT: da analyze - confused! ; ; LIN-LABEL: 'p2' ; LIN-NEXT: Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: store i64 %i.011, ptr %arrayidx8, align 8 @@ -42,7 +42,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; LIN-NEXT: Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 ; LIN-NEXT: da analyze - confused! ; LIN-NEXT: Src: store i64 %0, ptr %B.addr.24, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8 -; LIN-NEXT: da analyze - output [* * *]! +; LIN-NEXT: da analyze - confused! ; entry: %cmp10 = icmp sgt i64 %n, 0 diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll index f64a748..8b9aa25 100644 --- a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll @@ -437,7 +437,7 @@ define void @symbolicrdiv6(ptr %A, ptr %B, i64 %n1, i64 %n2) nounwind uwtable ss ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.12, align 4 --> Dst: store i32 %0, ptr %B.addr.12, align 4 -; CHECK-NEXT: da analyze - output [* *]! +; CHECK-NEXT: da analyze - confused! ; entry: %cmp4 = icmp eq i64 %n1, 0 diff --git a/llvm/test/Analysis/LoopAccessAnalysis/runtime-check-known-true.ll b/llvm/test/Analysis/LoopAccessAnalysis/runtime-check-known-true.ll new file mode 100644 index 0000000..30c8088 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/runtime-check-known-true.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s + +; TODO: Accesses are known completely before or after. +define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr %start.2, ptr %end) { +; CHECK-LABEL: 'test_runtime_check_known_false_after_construction' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %ptr.iv.1 = phi ptr [ %ptr.iv.1.next, %loop ], [ %start.1, %entry ] +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %ptr.iv.2 = phi ptr [ %ptr.iv.2.next, %loop ], [ %start.2.diff, %entry ] +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: ((-8 * ((2305843009213693951 * (8 + (-1 * (ptrtoint ptr %start.1 to i64)) + (ptrtoint ptr %end to i64))) /u 8)) + %start.1) High: (8 + %start.1)) +; CHECK-NEXT: Member: {%start.1,+,-8}<%loop> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: (-8 + (-8 * ((2305843009213693951 * (8 + (-1 * (ptrtoint ptr %start.1 to i64)) + (ptrtoint ptr %end to i64))) /u 8)) + (-1 * (ptrtoint ptr %start.2 to i64)) + (ptrtoint ptr %start.1 to i64) + %start.2) High: ((-1 * (ptrtoint ptr %start.2 to i64)) + (ptrtoint ptr %start.1 to i64) + %start.2)) +; CHECK-NEXT: Member: {(-8 + (-1 * (ptrtoint ptr %start.2 to i64)) + (ptrtoint ptr %start.1 to i64) + %start.2),+,-8}<%loop> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: Equal predicate: (zext i3 ((trunc i64 (ptrtoint ptr %end to i64) to i3) + (-1 * (trunc i64 (ptrtoint ptr %start.1 to i64) to i3))) to i64) == 0 +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %gep.start.2 = getelementptr i8, ptr %start.2, i64 8 + %start.1.int = ptrtoint ptr %start.1 to i64 + %start.2.int = ptrtoint ptr %gep.start.2 to i64 + %diff = sub i64 %start.1.int, %start.2.int + %start.2.diff = getelementptr i8, ptr %start.2, i64 %diff + br label %loop + +loop: + %ptr.iv.1 = phi ptr [ %ptr.iv.1.next, %loop ], [ %start.1, %entry ] + %ptr.iv.2 = phi ptr [ %ptr.iv.2.next, %loop ], [ %start.2.diff, %entry ] + %ptr.iv.2.next = getelementptr i8, ptr %ptr.iv.2, i64 -8 + %ptr.iv.1.next = getelementptr i8, ptr %ptr.iv.1, i64 -8 + %l = load i64, ptr %ptr.iv.2, align 8 + store i64 %l, ptr %ptr.iv.1, align 8 + %ec = icmp eq ptr %ptr.iv.2, %end + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/MemorySSA/pr39197.ll b/llvm/test/Analysis/MemorySSA/pr39197.ll index b52444f..af57b3c 100644 --- a/llvm/test/Analysis/MemorySSA/pr39197.ll +++ b/llvm/test/Analysis/MemorySSA/pr39197.ll @@ -8,6 +8,8 @@ target triple = "s390x-ibm-linux" @1 = internal global i64 9, align 8 @g_1042 = external dso_local global [5 x i16], align 2 +declare void @dummy() + ; CHECK-LABEL: @main() ; Function Attrs: nounwind define dso_local void @main() #0 { @@ -15,9 +17,6 @@ define dso_local void @main() #0 { unreachable } -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 - ; Function Attrs: nounwind define dso_local void @func_1() #0 { %1 = alloca ptr, align 8 @@ -31,7 +30,7 @@ define dso_local void @func_1() #0 { %7 = load i64, ptr @1, align 8, !tbaa !5 %8 = and i64 %7, %6 store i64 %8, ptr @1, align 8, !tbaa !5 - call void @llvm.lifetime.end.p0(i64 4, ptr undef) #2 + call void @dummy() unreachable ; <label>:9: ; preds = %0 diff --git a/llvm/test/Analysis/MemorySSA/pr43044.ll b/llvm/test/Analysis/MemorySSA/pr43044.ll index f4e0ce9..bd767d3 100644 --- a/llvm/test/Analysis/MemorySSA/pr43044.ll +++ b/llvm/test/Analysis/MemorySSA/pr43044.ll @@ -47,6 +47,8 @@ cleanup1400.loopexit1: ; preds = %for.cond1050 br label %cleanup1400 cleanup1400: ; preds = %cleanup1400.loopexit1, %cleanup1400.loopexit.split - call void @llvm.lifetime.end.p0(i64 4, ptr nonnull undef) + call void @dummy() unreachable } + +declare void @dummy() diff --git a/llvm/test/Analysis/MemorySSA/renamephis.ll b/llvm/test/Analysis/MemorySSA/renamephis.ll index 0e8cf8b..e297b99 100644 --- a/llvm/test/Analysis/MemorySSA/renamephis.ll +++ b/llvm/test/Analysis/MemorySSA/renamephis.ll @@ -41,7 +41,7 @@ block.exit: ; preds = %cond.exit unreachable sw.bb94: ; preds = %cond.exit - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull undef) + call void @g() br label %cleanup cleanup: ; preds = %sw.bb94, %cond.exit, %cond.exit diff --git a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll index 1799d15..39b475d 100644 --- a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll +++ b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll @@ -21,28 +21,26 @@ define i32 @d(i32 %base) { ; CHECK-NEXT: Classifying expressions for: @d ; CHECK-NEXT: %e = alloca [1 x [1 x i8]], align 1 ; CHECK-NEXT: --> %e U: full-set S: full-set -; CHECK-NEXT: %0 = bitcast ptr %e to ptr -; CHECK-NEXT: --> %e U: full-set S: full-set ; CHECK-NEXT: %f.0 = phi i32 [ %base, %entry ], [ %inc, %for.cond ] ; CHECK-NEXT: --> {%base,+,1}<nsw><%for.cond> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: %idxprom = sext i32 %f.0 to i64 ; CHECK-NEXT: --> {(sext i32 %base to i64),+,1}<nsw><%for.cond> U: [-2147483648,-9223372036854775808) S: [-2147483648,-9223372036854775808) Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: %arrayidx = getelementptr inbounds [1 x [1 x i8]], ptr %e, i64 0, i64 %idxprom ; CHECK-NEXT: --> {((sext i32 %base to i64) + %e),+,1}<nw><%for.cond> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable } -; CHECK-NEXT: %1 = load ptr, ptr @c, align 8 -; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } -; CHECK-NEXT: %sub.ptr.lhs.cast = ptrtoint ptr %1 to i64 -; CHECK-NEXT: --> (ptrtoint ptr %1 to i64) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } +; CHECK-NEXT: %load1 = load ptr, ptr @c, align 8 +; CHECK-NEXT: --> %load1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } +; CHECK-NEXT: %sub.ptr.lhs.cast = ptrtoint ptr %load1 to i64 +; CHECK-NEXT: --> (ptrtoint ptr %load1 to i64) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, ptrtoint (ptr @b to i64) -; CHECK-NEXT: --> ((-1 * (ptrtoint ptr @b to i64)) + (ptrtoint ptr %1 to i64)) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } +; CHECK-NEXT: --> ((-1 * (ptrtoint ptr @b to i64)) + (ptrtoint ptr %load1 to i64)) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 4 ; CHECK-NEXT: --> %sub.ptr.div U: [-2305843009213693952,2305843009213693952) S: [-2305843009213693952,2305843009213693952) Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %arrayidx1 = getelementptr inbounds [1 x i8], ptr %arrayidx, i64 0, i64 %sub.ptr.div ; CHECK-NEXT: --> ({((sext i32 %base to i64) + %e),+,1}<nw><%for.cond> + %sub.ptr.div) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } -; CHECK-NEXT: %2 = load i8, ptr %arrayidx1, align 1 -; CHECK-NEXT: --> %2 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } -; CHECK-NEXT: %conv = sext i8 %2 to i32 -; CHECK-NEXT: --> (sext i8 %2 to i32) U: [-128,128) S: [-128,128) Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } +; CHECK-NEXT: %load2 = load i8, ptr %arrayidx1, align 1 +; CHECK-NEXT: --> %load2 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } +; CHECK-NEXT: %conv = sext i8 %load2 to i32 +; CHECK-NEXT: --> (sext i8 %load2 to i32) U: [-128,128) S: [-128,128) Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %inc = add nsw i32 %f.0, 1 ; CHECK-NEXT: --> {(1 + %base),+,1}<nw><%for.cond> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: Determining loop execution counts for: @d @@ -52,21 +50,20 @@ define i32 @d(i32 %base) { ; entry: %e = alloca [1 x [1 x i8]], align 1 - %0 = bitcast ptr %e to ptr - call void @llvm.lifetime.start.p0(i64 1, ptr %0) #2 + call void @llvm.lifetime.start.p0(i64 1, ptr %e) #2 br label %for.cond for.cond: ; preds = %for.cond, %entry %f.0 = phi i32 [ %base, %entry ], [ %inc, %for.cond ] %idxprom = sext i32 %f.0 to i64 %arrayidx = getelementptr inbounds [1 x [1 x i8]], ptr %e, i64 0, i64 %idxprom - %1 = load ptr, ptr @c, align 8 - %sub.ptr.lhs.cast = ptrtoint ptr %1 to i64 + %load1 = load ptr, ptr @c, align 8 + %sub.ptr.lhs.cast = ptrtoint ptr %load1 to i64 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, ptrtoint (ptr @b to i64) %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 4 %arrayidx1 = getelementptr inbounds [1 x i8], ptr %arrayidx, i64 0, i64 %sub.ptr.div - %2 = load i8, ptr %arrayidx1, align 1 - %conv = sext i8 %2 to i32 + %load2 = load i8, ptr %arrayidx1, align 1 + %conv = sext i8 %load2 to i32 store i32 %conv, ptr @a, align 4 %inc = add nsw i32 %f.0, 1 br label %for.cond diff --git a/llvm/test/Analysis/ScalarEvolution/sdiv.ll b/llvm/test/Analysis/ScalarEvolution/sdiv.ll index e01f84f..9eaaf8b 100644 --- a/llvm/test/Analysis/ScalarEvolution/sdiv.ll +++ b/llvm/test/Analysis/ScalarEvolution/sdiv.ll @@ -38,7 +38,7 @@ define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 { entry: %storage = alloca [2 x i32], align 4 %0 = bitcast ptr %storage to ptr - call void @llvm.lifetime.start.p0(i64 8, ptr %0) #4 + call void @llvm.lifetime.start.p0(i64 8, ptr %storage) #4 call void @llvm.memset.p0.i64(ptr align 4 %0, i8 0, i64 8, i1 false) br label %for.cond @@ -48,7 +48,7 @@ for.cond: br i1 %cmp, label %for.body, label %for.cond.cleanup for.cond.cleanup: - call void @llvm.lifetime.end.p0(i64 8, ptr %0) #4 + call void @llvm.lifetime.end.p0(i64 8, ptr %storage) #4 ret void for.body: diff --git a/llvm/test/Analysis/ScalarEvolution/srem.ll b/llvm/test/Analysis/ScalarEvolution/srem.ll index ff898c9..377e58a 100644 --- a/llvm/test/Analysis/ScalarEvolution/srem.ll +++ b/llvm/test/Analysis/ScalarEvolution/srem.ll @@ -38,7 +38,7 @@ define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 { entry: %storage = alloca [2 x i32], align 4 %0 = bitcast ptr %storage to ptr - call void @llvm.lifetime.start.p0(i64 8, ptr %0) #4 + call void @llvm.lifetime.start.p0(i64 8, ptr %storage) #4 call void @llvm.memset.p0.i64(ptr align 4 %0, i8 0, i64 8, i1 false) br label %for.cond @@ -48,7 +48,7 @@ for.cond: br i1 %cmp, label %for.body, label %for.cond.cleanup for.cond.cleanup: - call void @llvm.lifetime.end.p0(i64 8, ptr %0) #4 + call void @llvm.lifetime.end.p0(i64 8, ptr %storage) #4 ret void for.body: |