diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2025-05-30 08:34:13 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-30 08:34:13 +0100 |
commit | 37edd2c1c32bf7599af28b96ae5adf909e96fc58 (patch) | |
tree | 7da287b248ad67ff76d816f31138f1f765b197ee | |
parent | 417e43ad43d706c8a932adf702a55de97e65fb37 (diff) | |
download | llvm-37edd2c1c32bf7599af28b96ae5adf909e96fc58.zip llvm-37edd2c1c32bf7599af28b96ae5adf909e96fc58.tar.gz llvm-37edd2c1c32bf7599af28b96ae5adf909e96fc58.tar.bz2 |
[X86] combineEXTRACT_SUBVECTOR - generalize extract_subvector(broadcast(x),c) fold with IsElementEquivalent (#141963)
Instead of matching the broadcast nodes directly, let IsElementEquivalent handle it to allow BITCAST handling, which we already have with IsElementEquivalent
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll | 32 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll | 12 |
3 files changed, 22 insertions, 41 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ecadc86a..c5d92d5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -59549,6 +59549,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, unsigned SizeInBits = VT.getSizeInBits(); unsigned InSizeInBits = InVecVT.getSizeInBits(); unsigned NumSubElts = VT.getVectorNumElements(); + unsigned NumInElts = InVecVT.getVectorNumElements(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc DL(N); @@ -59615,22 +59616,22 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, } } - // If we're extracting an upper subvector from a broadcast we should just - // extract the lowest subvector instead which should allow + // If we're extracting an upper subvector see if we'd get the same elements if + // we extracted the lowest subvector instead which should allow // SimplifyDemandedVectorElts do more simplifications. - if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST || - InVec.getOpcode() == X86ISD::VBROADCAST_LOAD || - DAG.isSplatValue(InVec, /*AllowUndefs*/ false))) - return extractSubVector(InVec, 0, DAG, DL, SizeInBits); + if (IdxVal != 0) { + bool AllEquiv = all_of(seq<unsigned>(NumSubElts), [&](unsigned I) { + return IsElementEquivalent(NumInElts, InVec, InVec, I, I + IdxVal); + }); + if (AllEquiv) + return extractSubVector(InVec, 0, DAG, DL, SizeInBits); + } // Check if we're extracting a whole broadcasted subvector. if (InVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { auto *MemIntr = cast<MemIntrinsicSDNode>(InVec); EVT MemVT = MemIntr->getMemoryVT(); if (MemVT == VT) { - // Just use the lowest subvector. - if (IdxVal != 0) - return extractSubVector(InVec, 0, DAG, DL, SizeInBits); // If this is the only use, we can replace with a regular load (this may // have been missed by SimplifyDemandedVectorElts due to extra uses of the // memory chain). diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll index d6e6ad1..f2e4da0 100644 --- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll @@ -3951,7 +3951,6 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in ; AVX512F-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) @@ -3962,7 +3961,6 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in ; AVX512DQ-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx) @@ -4004,7 +4002,7 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in ; ; AVX-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16: ; AVX: # %bb.0: -; AVX-NEXT: vbroadcastss (%rdi), %ymm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,0,0,0] ; AVX-NEXT: vpaddb 48(%rsi), %xmm0, %xmm1 ; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2 ; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm3 @@ -4013,7 +4011,6 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in ; AVX-NEXT: vmovdqa %xmm3, 16(%rdx) ; AVX-NEXT: vmovdqa %xmm2, 32(%rdx) ; AVX-NEXT: vmovdqa %xmm1, 48(%rdx) -; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX2-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16: @@ -4029,7 +4026,6 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in ; AVX512F-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) @@ -4040,7 +4036,6 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in ; AVX512DQ-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx) @@ -4082,7 +4077,7 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e ; ; AVX-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8: ; AVX: # %bb.0: -; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1] ; AVX-NEXT: vpaddb 48(%rsi), %xmm0, %xmm1 ; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2 ; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm3 @@ -4091,7 +4086,6 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e ; AVX-NEXT: vmovdqa %xmm3, 16(%rdx) ; AVX-NEXT: vmovdqa %xmm2, 32(%rdx) ; AVX-NEXT: vmovdqa %xmm1, 48(%rdx) -; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX2-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8: @@ -4107,7 +4101,6 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e ; AVX512F-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) @@ -4118,7 +4111,6 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e ; AVX512DQ-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx) @@ -4184,7 +4176,6 @@ define void @vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4(ptr %i ; AVX512F-LABEL: vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) @@ -4195,7 +4186,6 @@ define void @vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4(ptr %i ; AVX512DQ-LABEL: vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx) @@ -4338,7 +4328,6 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i ; AVX512F-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) @@ -4349,7 +4338,6 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i ; AVX512DQ-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx) @@ -4418,7 +4406,6 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in. ; AVX512F-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) @@ -4429,7 +4416,6 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in. ; AVX512DQ-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx) @@ -4497,7 +4483,6 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i ; AVX512F-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) @@ -4508,7 +4493,6 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i ; AVX512DQ-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx) @@ -4654,7 +4638,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; ; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd (%rdi), %zmm0 +; AVX512F-NEXT: vpbroadcastd (%rdi), %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) @@ -4664,7 +4648,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; ; AVX512DQ-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastd (%rdi), %zmm0 +; AVX512DQ-NEXT: vpbroadcastd (%rdi), %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx) @@ -4731,7 +4715,7 @@ define void @vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512F-LABEL: vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd (%rdi), %zmm0 +; AVX512F-NEXT: vpbroadcastd (%rdi), %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) @@ -4741,7 +4725,7 @@ define void @vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512DQ-LABEL: vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastd (%rdi), %zmm0 +; AVX512DQ-NEXT: vpbroadcastd (%rdi), %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx) @@ -4886,7 +4870,7 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512F-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastq (%rdi), %zmm0 +; AVX512F-NEXT: vpbroadcastq (%rdi), %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) @@ -4896,7 +4880,7 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512DQ-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastq (%rdi), %zmm0 +; AVX512DQ-NEXT: vpbroadcastq (%rdi), %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1 ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx) diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll b/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll index 40ad731..fcde7ff 100644 --- a/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll +++ b/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll @@ -134,9 +134,8 @@ define <16 x i32> @test_broadcast_4i32_16i32(ptr%p) nounwind { define <32 x i16> @test_broadcast_8i16_32i16(ptr%p) nounwind { ; X64-AVX512VL-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512VL: ## %bb.0: -; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512VL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 -; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; X64-AVX512VL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; X64-AVX512VL-NEXT: retq @@ -149,9 +148,8 @@ define <32 x i16> @test_broadcast_8i16_32i16(ptr%p) nounwind { ; ; X64-AVX512DQVL-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512DQVL: ## %bb.0: -; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512DQVL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 -; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; X64-AVX512DQVL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; X64-AVX512DQVL-NEXT: retq @@ -164,9 +162,8 @@ define <32 x i16> @test_broadcast_8i16_32i16(ptr%p) nounwind { define <64 x i8> @test_broadcast_16i8_64i8(ptr%p) nounwind { ; X64-AVX512VL-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512VL: ## %bb.0: -; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512VL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 -; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; X64-AVX512VL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; X64-AVX512VL-NEXT: retq @@ -179,9 +176,8 @@ define <64 x i8> @test_broadcast_16i8_64i8(ptr%p) nounwind { ; ; X64-AVX512DQVL-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512DQVL: ## %bb.0: -; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512DQVL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 -; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; X64-AVX512DQVL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; X64-AVX512DQVL-NEXT: retq |