aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2014-12-10 10:00:50 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2014-12-10 10:00:50 +0100
commit092444af0474ed194abe53ea7cbd950dd7e2cdbd (patch)
tree3150dd9808bf96ca60db0afc0b8e10d9b8d861d3
parentf2864cc4e11bd3a72efbf44b5f77eddd0bdeb37c (diff)
downloadgcc-092444af0474ed194abe53ea7cbd950dd7e2cdbd.zip
gcc-092444af0474ed194abe53ea7cbd950dd7e2cdbd.tar.gz
gcc-092444af0474ed194abe53ea7cbd950dd7e2cdbd.tar.bz2
re PR target/63594 (ICE: in ix86_vector_duplicate_value, at config/i386/i386.c:39831 with -mavx512f)
PR target/63594 * config/i386/sse.md (vec_dupv4sf): Move after <mask_codefor><avx512>_vec_dup_gpr<mode><mask_name> pattern. (*vec_dupv4si, *vec_dupv2di): Likewise. (<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>): Merge into ... (<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>): ... this pattern. (*vec_dup<mode> AVX2_VEC_DUP_MODE splitter): Disable for TARGET_AVX512VL (for QI/HI scalar modes only if TARGET_AVX512BW is set too). * config/i386/i386.c (enum ix86_builtins): Remove IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, IX86_BUILTIN_PBROADCASTQ128_MEM_MASK and IX86_BUILTIN_PBROADCASTQ512_MEM. (bdesc_args): Use __builtin_ia32_pbroadcastq512_gpr_mask, __builtin_ia32_pbroadcastq256_gpr_mask and __builtin_ia32_pbroadcastq128_gpr_mask instead of *_mem_mask regardless of OPTION_MASK_ISA_64BIT. * config/i386/avx512fintrin.h (_mm512_set1_epi64, _mm512_mask_set1_epi64, _mm512_maskz_set1_epi64): Use *_gpr_mask builtins regardless of whether TARGET_64BIT is defined or not. * config/i386/avx512vlintrin.h (_mm256_mask_set1_epi64, _mm256_maskz_set1_epi64, _mm_mask_set1_epi64, _mm_maskz_set1_epi64): Likewise. From-SVN: r218565
-rw-r--r--gcc/ChangeLog25
-rw-r--r--gcc/config/i386/avx512fintrin.h19
-rw-r--r--gcc/config/i386/avx512vlintrin.h24
-rw-r--r--gcc/config/i386/i386.c12
-rw-r--r--gcc/config/i386/sse.md142
5 files changed, 95 insertions, 127 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 997e38e..6300829 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,30 @@
2014-12-10 Jakub Jelinek <jakub@redhat.com>
+ PR target/63594
+ * config/i386/sse.md (vec_dupv4sf): Move after
+ <mask_codefor><avx512>_vec_dup_gpr<mode><mask_name> pattern.
+ (*vec_dupv4si, *vec_dupv2di): Likewise.
+ (<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>): Merge into ...
+ (<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>): ... this
+ pattern.
+ (*vec_dup<mode> AVX2_VEC_DUP_MODE splitter): Disable for
+ TARGET_AVX512VL (for QI/HI scalar modes only if TARGET_AVX512BW
+ is set too).
+ * config/i386/i386.c (enum ix86_builtins): Remove
+ IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
+ IX86_BUILTIN_PBROADCASTQ128_MEM_MASK and
+ IX86_BUILTIN_PBROADCASTQ512_MEM.
+ (bdesc_args): Use __builtin_ia32_pbroadcastq512_gpr_mask,
+ __builtin_ia32_pbroadcastq256_gpr_mask and
+ __builtin_ia32_pbroadcastq128_gpr_mask instead of *_mem_mask
+ regardless of OPTION_MASK_ISA_64BIT.
+ * config/i386/avx512fintrin.h (_mm512_set1_epi64,
+ _mm512_mask_set1_epi64, _mm512_maskz_set1_epi64): Use *_gpr_mask
+ builtins regardless of whether TARGET_64BIT is defined or not.
+ * config/i386/avx512vlintrin.h (_mm256_mask_set1_epi64,
+ _mm256_maskz_set1_epi64, _mm_mask_set1_epi64, _mm_maskz_set1_epi64):
+ Likewise.
+
* config/i386/sse.md (*mov<mode>_internal, *avx512f_gatherdi<mode>_2):
Use <MODE_SIZE> instead of GET_MODE_SIZE (<MODE>mode).
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 66f5199..b4842b2 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -3603,47 +3603,28 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_epi64 (long long __A)
{
-#ifdef TARGET_64BIT
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
(__v8di)
_mm512_undefined_si512 (),
(__mmask8)(-1));
-#else
- return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8)(-1));
-#endif
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
{
-#ifdef TARGET_64BIT
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
__M);
-#else
- return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
- __M);
-#endif
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
{
-#ifdef TARGET_64BIT
return (__m512i)
__builtin_ia32_pbroadcastq512_gpr_mask (__A,
(__v8di) _mm512_setzero_si512 (),
__M);
-#else
- return (__m512i)
- __builtin_ia32_pbroadcastq512_mem_mask (__A,
- (__v8di) _mm512_setzero_si512 (),
- __M);
-#endif
}
extern __inline __m512
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index a042e8c..56aaa3e 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -2642,30 +2642,18 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
{
-#ifdef TARGET_64BIT
return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
__M);
-#else
- return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, (__v4di) __O,
- __M);
-#endif
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
{
-#ifdef TARGET_64BIT
return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
(__v4di)
_mm256_setzero_si256 (),
__M);
-#else
- return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A,
- (__v4di)
- _mm256_setzero_si256 (),
- __M);
-#endif
}
extern __inline __m128i
@@ -2691,30 +2679,18 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
{
-#ifdef TARGET_64BIT
return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
__M);
-#else
- return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, (__v2di) __O,
- __M);
-#endif
}
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
{
-#ifdef TARGET_64BIT
return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
(__v2di)
_mm_setzero_si128 (),
__M);
-#else
- return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A,
- (__v2di)
- _mm_setzero_si128 (),
- __M);
-#endif
}
extern __inline __m256
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1e1716e..9fe69cc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -28823,7 +28823,6 @@ enum ix86_builtins
IX86_BUILTIN_PBROADCASTMW512,
IX86_BUILTIN_PBROADCASTQ512,
IX86_BUILTIN_PBROADCASTQ512_GPR,
- IX86_BUILTIN_PBROADCASTQ512_MEM,
IX86_BUILTIN_PCMPEQD512_MASK,
IX86_BUILTIN_PCMPEQQ512_MASK,
IX86_BUILTIN_PCMPGTD512_MASK,
@@ -29261,10 +29260,8 @@ enum ix86_builtins
IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
IX86_BUILTIN_PBROADCASTQ256_MASK,
IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
- IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
IX86_BUILTIN_PBROADCASTQ128_MASK,
IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
- IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
IX86_BUILTIN_BROADCASTSS256,
IX86_BUILTIN_BROADCASTSS128,
IX86_BUILTIN_BROADCASTSD256,
@@ -31803,8 +31800,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
{ OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
- { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
- { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
@@ -32078,11 +32074,9 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
- { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
- { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
- { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
- { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0075fe2..8548e16 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6319,22 +6319,6 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "vec_dupv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
- (vec_duplicate:V4SF
- (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
- "TARGET_SSE"
- "@
- vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
- vbroadcastss\t{%1, %0|%0, %1}
- shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "isa" "avx,avx,noavx")
- (set_attr "type" "sseshuf1,ssemov,sseshuf1")
- (set_attr "length_immediate" "1,0,1")
- (set_attr "prefix_extra" "0,1,*")
- (set_attr "prefix" "vex,vex,orig")
- (set_attr "mode" "V4SF")])
-
;; Although insertps takes register source, we prefer
;; unpcklps with register source since it is shorter.
(define_insn "*vec_concatv2sf_sse4_1"
@@ -12821,37 +12805,6 @@
operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
})
-(define_insn "*vec_dupv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
- (vec_duplicate:V4SI
- (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
- "TARGET_SSE"
- "@
- %vpshufd\t{$0, %1, %0|%0, %1, 0}
- vbroadcastss\t{%1, %0|%0, %1}
- shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "isa" "sse2,avx,noavx")
- (set_attr "type" "sselog1,ssemov,sselog1")
- (set_attr "length_immediate" "1,0,1")
- (set_attr "prefix_extra" "0,1,*")
- (set_attr "prefix" "maybe_vex,vex,orig")
- (set_attr "mode" "TI,V4SF,V4SF")])
-
-(define_insn "*vec_dupv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
- (vec_duplicate:V2DI
- (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
- "TARGET_SSE"
- "@
- punpcklqdq\t%0, %0
- vpunpcklqdq\t{%d1, %0|%0, %d1}
- %vmovddup\t{%1, %0|%0, %1}
- movlhps\t%0, %0"
- [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
- (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
- (set_attr "prefix" "orig,vex,maybe_vex,orig")
- (set_attr "mode" "TI,TI,DF,V4SF")])
-
(define_insn "*vec_concatv2si_sse4_1"
[(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y")
(vec_concat:V2SI
@@ -16665,46 +16618,78 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
(vec_duplicate:VI12_AVX512VL
- (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
"TARGET_AVX512BW"
- "vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
+ "@
+ vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
+ vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
- [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
- (vec_duplicate:VI48_AVX512VL
- (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
- "TARGET_AVX512F && (<ssescalarmode>mode != DImode || TARGET_64BIT)"
-{
- return "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-}
- [(set_attr "type" "ssemov")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
- [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
+ [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
(vec_duplicate:V48_AVX512VL
- (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
"TARGET_AVX512F"
"v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+ (set_attr "mode" "<sseinsnmode>")
+ (set (attr "enabled")
+ (if_then_else (eq_attr "alternative" "1")
+ (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
+ && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
+ (const_int 1)))])
-(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
- (vec_duplicate:VI12_AVX512VL
- (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512BW"
- "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+(define_insn "vec_dupv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
+ "TARGET_SSE"
+ "@
+ vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
+ vbroadcastss\t{%1, %0|%0, %1}
+ shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "isa" "avx,avx,noavx")
+ (set_attr "type" "sseshuf1,ssemov,sseshuf1")
+ (set_attr "length_immediate" "1,0,1")
+ (set_attr "prefix_extra" "0,1,*")
+ (set_attr "prefix" "vex,vex,orig")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*vec_dupv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (vec_duplicate:V4SI
+ (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
+ "TARGET_SSE"
+ "@
+ %vpshufd\t{$0, %1, %0|%0, %1, 0}
+ vbroadcastss\t{%1, %0|%0, %1}
+ shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "isa" "sse2,avx,noavx")
+ (set_attr "type" "sselog1,ssemov,sselog1")
+ (set_attr "length_immediate" "1,0,1")
+ (set_attr "prefix_extra" "0,1,*")
+ (set_attr "prefix" "maybe_vex,vex,orig")
+ (set_attr "mode" "TI,V4SF,V4SF")])
+
+(define_insn "*vec_dupv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
+ (vec_duplicate:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
+ "TARGET_SSE"
+ "@
+ punpcklqdq\t%0, %0
+ vpunpcklqdq\t{%d1, %0|%0, %d1}
+ %vmovddup\t{%1, %0|%0, %1}
+ movlhps\t%0, %0"
+ [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
+ (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
+ (set_attr "prefix" "orig,vex,maybe_vex,orig")
+ (set_attr "mode" "TI,TI,DF,V4SF")])
(define_insn "avx2_vbroadcasti128_<mode>"
[(set (match_operand:VI_256 0 "register_operand" "=x")
@@ -16759,7 +16744,14 @@
[(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
(vec_duplicate:AVX2_VEC_DUP_MODE
(match_operand:<ssescalarmode> 1 "register_operand")))]
- "TARGET_AVX2 && reload_completed && GENERAL_REG_P (operands[1])"
+ "TARGET_AVX2
+ /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
+ available, because then we can broadcast from GPRs directly.
+ For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
+ for V*SI mode it requires just -mavx512vl. */
+ && !(TARGET_AVX512VL
+ && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
+ && reload_completed && GENERAL_REG_P (operands[1])"
[(const_int 0)]
{
emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),