diff options
author | Ilya Enkovich <ilya.enkovich@intel.com> | 2014-12-03 08:45:45 +0000 |
---|---|---|
committer | Ilya Enkovich <ienkovich@gcc.gnu.org> | 2014-12-03 08:45:45 +0000 |
commit | 45392c76508048665ea5ad3269b59894724d5cb5 (patch) | |
tree | ad09dd9e85bd4fb92af58f56cccbbc9c0eb7429a /gcc | |
parent | 17adbcebf01689d2f2c5386845398d9aedbe4cb3 (diff) | |
download | gcc-45392c76508048665ea5ad3269b59894724d5cb5.zip gcc-45392c76508048665ea5ad3269b59894724d5cb5.tar.gz gcc-45392c76508048665ea5ad3269b59894724d5cb5.tar.bz2 |
constraints.md (Yr): New.
gcc/
* config/i386/constraints.md (Yr): New.
* config/i386/i386.h (reg_class): Add NO_REX_SSE_REGS.
(REG_CLASS_NAMES): Likewise.
(REG_CLASS_CONTENTS): Likewise.
* config/i386/sse.md (*vec_concatv2sf_sse4_1): Add alternatives
which use only NO_REX_SSE_REGS.
(vec_set<mode>_0): Likewise.
(*vec_setv4sf_sse4_1): Likewise.
(sse4_1_insertps): Likewise.
(*sse4_1_extractps): Likewise.
(*sse4_1_mulv2siv2di3<mask_name>): Likewise.
(*<sse4_1_avx2>_mul<mode>3<mask_name>): Likewise.
(*sse4_1_<code><mode>3<mask_name>): Likewise.
(*sse4_1_<code><mode>3): Likewise.
(*sse4_1_eqv2di3): Likewise.
(sse4_2_gtv2di3): Likewise.
(*vec_extractv4si): Likewise.
(*vec_concatv2si_sse4_1): Likewise.
(vec_concatv2di): Likewise.
(<sse4_1>_blend<ssemodesuffix><avxsizesuffix>): Likewise.
(<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>): Likewise.
(<sse4_1>_dp<ssemodesuffix><avxsizesuffix>): Likewise.
(<vi8_sse4_1_avx2_avx512>_movntdqa): Likewise.
(<sse4_1_avx2>_mpsadbw): Likewise.
(<sse4_1_avx2>packusdw<mask_name>): Likewise.
(<sse4_1_avx2>_pblendvb): Likewise.
(sse4_1_pblendw): Likewise.
(sse4_1_phminposuw): Likewise.
(sse4_1_<code>v8qiv8hi2<mask_name>): Likewise.
(sse4_1_<code>v4qiv4si2<mask_name>): Likewise.
(sse4_1_<code>v4hiv4si2<mask_name>): Likewise.
(sse4_1_<code>v2qiv2di2<mask_name>): Likewise.
(sse4_1_<code>v2hiv2di2<mask_name>): Likewise.
(sse4_1_<code>v2siv2di2<mask_name>): Likewise.
(sse4_1_ptest): Likewise.
(<sse4_1>_round<ssemodesuffix><avxsizesuffix>): Likewise.
(sse4_1_round<ssescalarmodesuffix>): Likewise.
* config/i386/subst.md (mask_prefix4): New.
* config/i386/x86-tune.def (X86_TUNE_AVOID_4BYTE_PREFIXES): New.
gcc/testsuites/
* gcc.target/i386/sse2-init-v2di-2.c: Adjust to changed
vec_concatv2di template.
From-SVN: r218303
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 42 | ||||
-rw-r--r-- | gcc/config/i386/constraints.md | 6 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 393 | ||||
-rw-r--r-- | gcc/config/i386/subst.md | 1 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 4 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c | 2 |
8 files changed, 271 insertions, 185 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e3b3477..85a68b9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,45 @@ +2014-12-03 Ilya Enkovich <ilya.enkovich@intel.com> + + * config/i386/constraints.md (Yr): New. + * config/i386/i386.h (reg_class): Add NO_REX_SSE_REGS. + (REG_CLASS_NAMES): Likewise. + (REG_CLASS_CONTENTS): Likewise. + * config/i386/sse.md (*vec_concatv2sf_sse4_1): Add alternatives + which use only NO_REX_SSE_REGS. + (vec_set<mode>_0): Likewise. + (*vec_setv4sf_sse4_1): Likewise. + (sse4_1_insertps): Likewise. + (*sse4_1_extractps): Likewise. + (*sse4_1_mulv2siv2di3<mask_name>): Likewise. + (*<sse4_1_avx2>_mul<mode>3<mask_name>): Likewise. + (*sse4_1_<code><mode>3<mask_name>): Likewise. + (*sse4_1_<code><mode>3): Likewise. + (*sse4_1_eqv2di3): Likewise. + (sse4_2_gtv2di3): Likewise. + (*vec_extractv4si): Likewise. + (*vec_concatv2si_sse4_1): Likewise. + (vec_concatv2di): Likewise. + (<sse4_1>_blend<ssemodesuffix><avxsizesuffix>): Likewise. + (<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>): Likewise. + (<sse4_1>_dp<ssemodesuffix><avxsizesuffix>): Likewise. + (<vi8_sse4_1_avx2_avx512>_movntdqa): Likewise. + (<sse4_1_avx2>_mpsadbw): Likewise. + (<sse4_1_avx2>packusdw<mask_name>): Likewise. + (<sse4_1_avx2>_pblendvb): Likewise. + (sse4_1_pblendw): Likewise. + (sse4_1_phminposuw): Likewise. + (sse4_1_<code>v8qiv8hi2<mask_name>): Likewise. + (sse4_1_<code>v4qiv4si2<mask_name>): Likewise. + (sse4_1_<code>v4hiv4si2<mask_name>): Likewise. + (sse4_1_<code>v2qiv2di2<mask_name>): Likewise. + (sse4_1_<code>v2hiv2di2<mask_name>): Likewise. + (sse4_1_<code>v2siv2di2<mask_name>): Likewise. + (sse4_1_ptest): Likewise. + (<sse4_1>_round<ssemodesuffix><avxsizesuffix>): Likewise. + (sse4_1_round<ssescalarmodesuffix>): Likewise. + * config/i386/subst.md (mask_prefix4): New. + * config/i386/x86-tune.def (X86_TUNE_AVOID_4BYTE_PREFIXES): New. + 2014-12-03 Segher Boessenkool <segher@kernel.crashing.org> PR rtl-optimization/52714 diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index b7183a1..c8093f5 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -106,6 +106,8 @@ ;; a Integer register when zero extensions with AND are disabled ;; p Integer register when TARGET_PARTIAL_REG_STALL is disabled ;; f x87 register when 80387 floating point arithmetic is enabled +;; r SSE regs not requiring REX prefix when prefixes avoidance is enabled +;; and all SSE regs otherwise (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS" "First SSE register (@code{%xmm0}).") @@ -139,6 +141,10 @@ "(ix86_fpmath & FPMATH_387) ? FLOAT_REGS : NO_REGS" "@internal Any x87 register when 80387 FP arithmetic is enabled.") +(define_register_constraint "Yr" + "TARGET_SSE ? (X86_TUNE_AVOID_4BYTE_PREFIXES ? NO_REX_SSE_REGS : ALL_SSE_REGS) : NO_REGS" + "@internal Lower SSE register when avoiding REX prefix and all SSE registers otherwise.") + ;; We use the B prefix to denote any number of internal operands: ;; s Sibcall memory operand, not valid for TARGET_X32 ;; w Call memory operand, not valid for TARGET_X32 diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index bc76620..df7789d 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1311,6 +1311,7 @@ enum reg_class FP_TOP_REG, FP_SECOND_REG, /* %st(0) %st(1) */ FLOAT_REGS, SSE_FIRST_REG, + NO_REX_SSE_REGS, SSE_REGS, EVEX_SSE_REGS, BND_REGS, @@ -1369,6 +1370,7 @@ enum reg_class "FP_TOP_REG", "FP_SECOND_REG", \ "FLOAT_REGS", \ "SSE_FIRST_REG", \ + "NO_REX_SSE_REGS", \ "SSE_REGS", \ "EVEX_SSE_REGS", \ "BND_REGS", \ @@ -1409,6 +1411,7 @@ enum reg_class { 0x0200, 0x0, 0x0 }, /* FP_SECOND_REG */ \ { 0xff00, 0x0, 0x0 }, /* FLOAT_REGS */ \ { 0x200000, 0x0, 0x0 }, /* SSE_FIRST_REG */ \ +{ 0x1fe00000, 0x000000, 0x0 }, /* NO_REX_SSE_REGS */ \ { 0x1fe00000, 0x1fe000, 0x0 }, /* SSE_REGS */ \ { 0x0,0xffe00000, 0x1f }, /* EVEX_SSE_REGS */ \ { 0x0, 0x0,0x1e000 }, /* BND_REGS */ \ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ca5d720..c3aaea3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -6338,26 +6338,28 @@ ;; Although insertps takes register source, we prefer ;; unpcklps with register source since it is shorter. (define_insn "*vec_concatv2sf_sse4_1" - [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y") + [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x,Yr,*x,x,x,*y ,*y") (vec_concat:V2SF - (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m") - (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))] + (match_operand:SF 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,m, 0 , m") + (match_operand:SF 2 "vector_move_operand" " Yr,*x,x, m,m, m,C,*ym, C")))] "TARGET_SSE4_1" "@ unpcklps\t{%2, %0|%0, %2} + unpcklps\t{%2, %0|%0, %2} vunpcklps\t{%2, %1, %0|%0, %1, %2} insertps\t{$0x10, %2, %0|%0, %2, 0x10} + insertps\t{$0x10, %2, %0|%0, %2, 0x10} vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10} %vmovss\t{%1, %0|%0, %1} punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" - [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") - (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") - (set_attr "prefix_data16" "*,*,1,*,*,*,*") - (set_attr "prefix_extra" "*,*,1,1,*,*,*") - (set_attr "length_immediate" "*,*,1,1,*,*,*") - (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") - (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")]) + [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*") + (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") + (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*") + (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*") + (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*") + (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig") + (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")]) ;; ??? In theory we can match memory for the MMX alternative, but allowing ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE @@ -6405,49 +6407,51 @@ ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "vec_set<mode>_0" [(set (match_operand:VI4F_128 0 "nonimmediate_operand" - "=v,v,v ,x,x,v,x ,x ,m ,m ,m") + "=Yr,*v,v,v ,x,x,v,Yr ,*x ,x ,m ,m ,m") (vec_merge:VI4F_128 (vec_duplicate:VI4F_128 (match_operand:<ssescalarmode> 2 "general_operand" - " v,m,*r,m,x,v,*rm,*rm,!x,!*re,!*fF")) + " Yr,*v,m,*r,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF")) (match_operand:VI4F_128 1 "vector_move_operand" - " C,C,C ,C,0,v,0 ,x ,0 ,0 ,0") + " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0") (const_int 1)))] "TARGET_SSE" "@ %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} + %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2} %vmovd\t{%2, %0|%0, %2} movss\t{%2, %0|%0, %2} movss\t{%2, %0|%0, %2} vmovss\t{%2, %1, %0|%0, %1, %2} pinsrd\t{$0, %2, %0|%0, %2, 0} + pinsrd\t{$0, %2, %0|%0, %2, 0} vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0} # # #" - [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*") + [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*") (set (attr "type") - (cond [(eq_attr "alternative" "0,6,7") + (cond [(eq_attr "alternative" "0,1,7,8,9") (const_string "sselog") - (eq_attr "alternative" "9") + (eq_attr "alternative" "11") (const_string "imov") - (eq_attr "alternative" "10") + (eq_attr "alternative" "12") (const_string "fmov") ] (const_string "ssemov"))) - (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*") - (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*") - (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*") - (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")]) + (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*") + (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*") + (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")]) ;; A subset is vec_setv4sf. (define_insn "*vec_setv4sf_sse4_1" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") + [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x") (vec_merge:V4SF (vec_duplicate:V4SF - (match_operand:SF 2 "nonimmediate_operand" "xm,xm")) - (match_operand:V4SF 1 "register_operand" "0,x") + (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm")) + (match_operand:V4SF 1 "register_operand" "0,0,x") (match_operand:SI 3 "const_int_operand")))] "TARGET_SSE4_1 && ((unsigned) exact_log2 (INTVAL (operands[3])) @@ -6457,26 +6461,27 @@ switch (which_alternative) { case 0: - return "insertps\t{%3, %2, %0|%0, %2, %3}"; case 1: + return "insertps\t{%3, %2, %0|%0, %2, %3}"; + case 2: return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "V4SF")]) (define_insn "sse4_1_insertps" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") - (match_operand:V4SF 1 "register_operand" "0,x") - (match_operand:SI 3 "const_0_to_255_operand" "n,n")] + [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x") + (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:V4SF 1 "register_operand" "0,0,x") + (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] UNSPEC_INSERTPS))] "TARGET_SSE4_1" { @@ -6490,19 +6495,20 @@ switch (which_alternative) { case 0: - return "insertps\t{%3, %2, %0|%0, %2, %3}"; case 1: + return "insertps\t{%3, %2, %0|%0, %2, %3}"; + case 2: return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "V4SF")]) (define_split @@ -6544,13 +6550,14 @@ }) (define_insn_and_split "*sse4_1_extractps" - [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x") + [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x") (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x,0,x") - (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))] + (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))] "TARGET_SSE4_1" "@ %vextractps\t{%2, %1, %0|%0, %1, %2} + %vextractps\t{%2, %1, %0|%0, %1, %2} # #" "&& reload_completed && SSE_REG_P (operands[0])" @@ -6575,13 +6582,13 @@ } DONE; } - [(set_attr "isa" "*,noavx,avx") - (set_attr "type" "sselog,*,*") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix_extra" "1,*,*") - (set_attr "length_immediate" "1,*,*") - (set_attr "prefix" "maybe_vex,*,*") - (set_attr "mode" "V4SF,*,*")]) + [(set_attr "isa" "*,*,noavx,avx") + (set_attr "type" "sselog,sselog,*,*") + (set_attr "prefix_data16" "1,1,*,*") + (set_attr "prefix_extra" "1,1,*,*") + (set_attr "length_immediate" "1,1,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,*,*") + (set_attr "mode" "V4SF,V4SF,*,*")]) (define_insn_and_split "*vec_extractv4sf_mem" [(set (match_operand:SF 0 "register_operand" "=x,*r,f") @@ -9553,26 +9560,27 @@ "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") (define_insn "*sse4_1_mulv2siv2di3<mask_name>" - [(set (match_operand:V2DI 0 "register_operand" "=x,v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v") (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%0,v") + (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm") + (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm") (parallel [(const_int 0) (const_int 2)])))))] "TARGET_SSE4_1 && <mask_avx512vl_condition> && ix86_binary_operator_ok (MULT, V4SImode, operands)" "@ pmuldq\t{%2, %0|%0, %2} + pmuldq\t{%2, %0|%0, %2} vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseimul") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "avx512bw_pmaddwd512<mode><mask_name>" @@ -9752,19 +9760,20 @@ }) (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>" - [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v") + [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v") (mult:VI4_AVX512F - (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v") - (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))] + (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))] "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>" "@ pmulld\t{%2, %0|%0, %2} + pmulld\t{%2, %0|%0, %2} vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseimul") (set_attr "prefix_extra" "1") - (set_attr "prefix" "<mask_prefix3>") - (set_attr "btver2_decode" "vector,vector") + (set_attr "prefix" "<mask_prefix4>") + (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "<sseinsnmode>")]) (define_expand "mul<mode>3" @@ -10241,20 +10250,21 @@ }) (define_insn "*sse4_1_<code><mode>3<mask_name>" - [(set (match_operand:VI14_128 0 "register_operand" "=x,v") + [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v") (smaxmin:VI14_128 - (match_operand:VI14_128 1 "nonimmediate_operand" "%0,v") - (match_operand:VI14_128 2 "nonimmediate_operand" "xm,vm")))] + (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))] "TARGET_SSE4_1 && <mask_mode512bit_condition> && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} + p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "*<code>v8hi3" @@ -10324,20 +10334,21 @@ }) (define_insn "*sse4_1_<code><mode>3<mask_name>" - [(set (match_operand:VI24_128 0 "register_operand" "=x,v") + [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v") (umaxmin:VI24_128 - (match_operand:VI24_128 1 "nonimmediate_operand" "%0,v") - (match_operand:VI24_128 2 "nonimmediate_operand" "xm,vm")))] + (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))] "TARGET_SSE4_1 && <mask_mode512bit_condition> && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} + p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "*<code>v16qi3" @@ -10427,18 +10438,19 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "*sse4_1_eqv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x") (eq:V2DI - (match_operand:V2DI 1 "nonimmediate_operand" "%0,x") - (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x") + (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))] "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)" "@ pcmpeqq\t{%2, %0|%0, %2} + pcmpeqq\t{%2, %0|%0, %2} vpcmpeqq\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssecmp") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "*sse2_eq<mode>3" @@ -10474,18 +10486,19 @@ "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);") (define_insn "sse4_2_gtv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x") (gt:V2DI - (match_operand:V2DI 1 "register_operand" "0,x") - (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:V2DI 1 "register_operand" "0,0,x") + (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))] "TARGET_SSE4_2" "@ pcmpgtq\t{%2, %0|%0, %2} + pcmpgtq\t{%2, %0|%0, %2} vpcmpgtq\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssecmp") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "avx2_gt<mode>3" @@ -12705,9 +12718,9 @@ "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));") (define_insn "*vec_extractv4si" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x") + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x") (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x,0,x") + (match_operand:V4SI 1 "register_operand" "x,0,0,x") (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))] "TARGET_SSE4_1" { @@ -12717,10 +12730,11 @@ return "%vpextrd\t{%2, %1, %0|%0, %1, %2}"; case 1: + case 2: operands [2] = GEN_INT (INTVAL (operands[2]) * 4); return "psrldq\t{%2, %0|%0, %2}"; - case 2: + case 3: operands [2] = GEN_INT (INTVAL (operands[2]) * 4); return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; @@ -12728,11 +12742,11 @@ gcc_unreachable (); } } - [(set_attr "isa" "*,noavx,avx") - (set_attr "type" "sselog1,sseishft1,sseishft1") - (set_attr "prefix_extra" "1,*,*") + [(set_attr "isa" "*,noavx,noavx,avx") + (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1") + (set_attr "prefix_extra" "1,*,*,*") (set_attr "length_immediate" "1") - (set_attr "prefix" "maybe_vex,orig,vex") + (set_attr "prefix" "maybe_vex,orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "*vec_extractv4si_zext" @@ -12839,25 +12853,27 @@ (set_attr "mode" "TI,TI,DF,V4SF")]) (define_insn "*vec_concatv2si_sse4_1" - [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y") + [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y") (vec_concat:V2SI - (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm") - (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))] + (match_operand:SI 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,rm, 0,rm") + (match_operand:SI 2 "vector_move_operand" " rm,rm,rm,Yr,*x,x, C,*ym, C")))] "TARGET_SSE4_1" "@ pinsrd\t{$1, %2, %0|%0, %2, 1} + pinsrd\t{$1, %2, %0|%0, %2, 1} vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1} punpckldq\t{%2, %0|%0, %2} + punpckldq\t{%2, %0|%0, %2} vpunpckldq\t{%2, %1, %0|%0, %1, %2} %vmovd\t{%1, %0|%0, %1} punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" - [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") - (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") - (set_attr "prefix_extra" "1,1,*,*,*,*,*") - (set_attr "length_immediate" "1,1,*,*,*,*,*") - (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") - (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")]) + [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*") + (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") + (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*") + (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*") + (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig") + (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")]) ;; ??? In theory we can match memory for the MMX alternative, but allowing ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE @@ -12900,15 +12916,16 @@ ;; movd instead of movq is required to handle broken assemblers. (define_insn "vec_concatv2di" [(set (match_operand:V2DI 0 "register_operand" - "=x,x ,Yi,x ,!x,x,x,x,x,x") + "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" - " 0,x ,r ,xm,*y,0,x,0,0,x") + " 0, 0,x ,r ,xm,*y,0,x,0,0,x") (match_operand:DI 2 "vector_move_operand" - "rm,rm,C ,C ,C ,x,x,x,m,m")))] + "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))] "TARGET_SSE" "@ pinsrq\t{$1, %2, %0|%0, %2, 1} + pinsrq\t{$1, %2, %0|%0, %2, 1} vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1} * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\"; %vmovq\t{%1, %0|%0, %1} @@ -12918,17 +12935,17 @@ movlhps\t{%2, %0|%0, %2} movhps\t{%2, %0|%0, %2} vmovhps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx") + [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx") (set (attr "type") (if_then_else - (eq_attr "alternative" "0,1,5,6") + (eq_attr "alternative" "0,1,2,6,7") (const_string "sselog") (const_string "ssemov"))) - (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*") - (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*") - (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*") - (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex") - (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) + (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*") + (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*") + (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*") + (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex") + (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) (define_expand "vec_unpacks_lo_<mode>" [(match_operand:<sseunpackmode> 0 "register_operand") @@ -13968,61 +13985,64 @@ [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (vec_merge:VF_128_256 - (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm") - (match_operand:VF_128_256 1 "register_operand" "0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:VF_128_256 1 "register_operand" "0,0,x") (match_operand:SI 3 "const_0_to_<blendbits>_operand")))] "TARGET_SSE4_1" "@ blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} + blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "<MODE>")]) (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 - [(match_operand:VF_128_256 1 "register_operand" "0,x") - (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm") - (match_operand:VF_128_256 3 "register_operand" "Yz,x")] + [(match_operand:VF_128_256 1 "register_operand" "0,0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")] UNSPEC_BLENDV))] "TARGET_SSE4_1" "@ blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} + blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "vector,vector") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "<MODE>")]) (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 - [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x") - (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm") - (match_operand:SI 3 "const_0_to_255_operand" "n,n")] + [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] UNSPEC_DP))] "TARGET_SSE4_1" "@ dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} + dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemul") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "vector,vector") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "<MODE>")]) ;; Mode attribute used by `vmovntdqa' pattern @@ -14030,86 +14050,90 @@ [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")]) (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa" - [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v") - (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")] + [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v") + (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")] UNSPEC_MOVNTDQA))] "TARGET_SSE4_1" "%vmovntdqa\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1, *") - (set_attr "prefix" "maybe_vex, evex") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "maybe_vex,maybe_vex,evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse4_1_avx2>_mpsadbw" - [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x") (unspec:VI1_AVX2 - [(match_operand:VI1_AVX2 1 "register_operand" "0,x") - (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") - (match_operand:SI 3 "const_0_to_255_operand" "n,n")] + [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x") + (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] UNSPEC_MPSADBW))] "TARGET_SSE4_1" "@ mpsadbw\t{%3, %2, %0|%0, %2, %3} + mpsadbw\t{%3, %2, %0|%0, %2, %3} vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sselog1") (set_attr "length_immediate" "1") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "vector,vector") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse4_1_avx2>_packusdw<mask_name>" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v") (vec_concat:VI2_AVX2 (us_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 1 "register_operand" "0,v")) + (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v")) (us_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))] + (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))] "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ packusdw\t{%2, %0|%0, %2} + packusdw\t{%2, %0|%0, %2} vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sselog") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex") + (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse4_1_avx2>_pblendvb" - [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x") (unspec:VI1_AVX2 - [(match_operand:VI1_AVX2 1 "register_operand" "0,x") - (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") - (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")] + [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x") + (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")] UNSPEC_BLENDV))] "TARGET_SSE4_1" "@ pblendvb\t{%3, %2, %0|%0, %2, %3} + pblendvb\t{%3, %2, %0|%0, %2, %3} vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "*,1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "vector,vector") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "<sseinsnmode>")]) (define_insn "sse4_1_pblendw" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x") (vec_merge:V8HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") - (match_operand:V8HI 1 "register_operand" "0,x") - (match_operand:SI 3 "const_0_to_255_operand" "n,n")))] + (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:V8HI 1 "register_operand" "0,0,x") + (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))] "TARGET_SSE4_1" "@ pblendw\t{%3, %2, %0|%0, %2, %3} + pblendw\t{%3, %2, %0|%0, %2, %3} vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) ;; The builtin uses an 8-bit immediate. Expand that. @@ -14157,8 +14181,8 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "sse4_1_phminposuw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x") + (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")] UNSPEC_PHMINPOSUW))] "TARGET_SSE4_1" "%vphminposuw\t{%1, %0|%0, %1}" @@ -14190,10 +14214,10 @@ (set_attr "mode" "XI")]) (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>" - [(set (match_operand:V8HI 0 "register_operand" "=v") + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v") (any_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "vm") + (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3) (const_int 4) (const_int 5) @@ -14233,10 +14257,10 @@ (set_attr "mode" "OI")]) (define_insn "sse4_1_<code>v4qiv4si2<mask_name>" - [(set (match_operand:V4SI 0 "register_operand" "=v") + [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v") (any_extend:V4SI (vec_select:V4QI - (match_operand:V16QI 1 "nonimmediate_operand" "vm") + (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] "TARGET_SSE4_1 && <mask_avx512vl_condition>" @@ -14269,10 +14293,10 @@ (set_attr "mode" "OI")]) (define_insn "sse4_1_<code>v4hiv4si2<mask_name>" - [(set (match_operand:V4SI 0 "register_operand" "=v") + [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v") (any_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "vm") + (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] "TARGET_SSE4_1 && <mask_avx512vl_condition>" @@ -14313,10 +14337,10 @@ (set_attr "mode" "OI")]) (define_insn "sse4_1_<code>v2qiv2di2<mask_name>" - [(set (match_operand:V2DI 0 "register_operand" "=v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") (any_extend:V2DI (vec_select:V2QI - (match_operand:V16QI 1 "nonimmediate_operand" "vm") + (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}" @@ -14351,10 +14375,10 @@ (set_attr "mode" "OI")]) (define_insn "sse4_1_<code>v2hiv2di2<mask_name>" - [(set (match_operand:V2DI 0 "register_operand" "=v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") (any_extend:V2DI (vec_select:V2HI - (match_operand:V8HI 1 "nonimmediate_operand" "vm") + (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" @@ -14386,10 +14410,10 @@ (set_attr "mode" "OI")]) (define_insn "sse4_1_<code>v2siv2di2<mask_name>" - [(set (match_operand:V2DI 0 "register_operand" "=v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") (any_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "vm") + (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" @@ -14430,8 +14454,8 @@ (define_insn "sse4_1_ptest" [(set (reg:CC FLAGS_REG) - (unspec:CC [(match_operand:V2DI 0 "register_operand" "x") - (match_operand:V2DI 1 "nonimmediate_operand" "xm")] + (unspec:CC [(match_operand:V2DI 0 "register_operand" "Yr,*x") + (match_operand:V2DI 1 "nonimmediate_operand" "Yrm,*xm")] UNSPEC_PTEST))] "TARGET_SSE4_1" "%vptest\t{%1, %0|%0, %1}" @@ -14441,10 +14465,10 @@ (set_attr "mode" "TI")]) (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>" - [(set (match_operand:VF_128_256 0 "register_operand" "=x") + [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x") (unspec:VF_128_256 - [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "const_0_to_15_operand" "n")] + [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm") + (match_operand:SI 2 "const_0_to_15_operand" "n,n")] UNSPEC_ROUND))] "TARGET_ROUND" "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" @@ -14524,24 +14548,25 @@ }) (define_insn "sse4_1_round<ssescalarmodesuffix>" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 2 "register_operand" "x,x") - (match_operand:SI 3 "const_0_to_15_operand" "n,n")] + [(match_operand:VF_128 2 "register_operand" "Yr,*x,x") + (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")] UNSPEC_ROUND) - (match_operand:VF_128 1 "register_operand" "0,x") + (match_operand:VF_128 1 "register_operand" "0,0,x") (const_int 1)))] "TARGET_ROUND" "@ round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} + round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssecvt") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "<MODE>")]) (define_expand "round<mode>2" diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md index 91228c8..d4ce519 100644 --- a/gcc/config/i386/subst.md +++ b/gcc/config/i386/subst.md @@ -63,6 +63,7 @@ (define_subst_attr "mask_prefix" "mask" "vex" "evex") (define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex") (define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex") +(define_subst_attr "mask_prefix4" "mask" "orig,orig,vex" "evex") (define_subst_attr "mask_expand_op3" "mask" "3" "5") (define_subst "mask" diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 735e6e5..b5c6e4f 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -395,6 +395,10 @@ DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb", DEF_TUNE (X86_TUNE_VECTOR_PARALLEL_EXECUTION, "vec_parallel", m_NEHALEM | m_SANDYBRIDGE | m_HASWELL) +/* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */ +DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", + m_SILVERMONT | m_INTEL) + /*****************************************************************************/ /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ /*****************************************************************************/ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ee812d2..b0d1786 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2014-12-03 Ilya Enkovich <ilya.enkovich@intel.com> + + * gcc.target/i386/sse2-init-v2di-2.c: Adjust to changed + vec_concatv2di template. + 2014-12-03 Segher Boessenkool <segher.kernel.crashing.org> PR rtl-optimization/52714 diff --git a/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c b/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c index 0aa5264..b347a4a 100644 --- a/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c +++ b/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c @@ -10,4 +10,4 @@ test (long long b) return _mm_cvtsi64_si128 (b); } -/* { dg-final { scan-assembler-times "vec_concatv2di/3" 1 } } */ +/* { dg-final { scan-assembler-times "vec_concatv2di/4" 1 } } */ |