diff options
Diffstat (limited to 'gcc/config/i386/sse.md')
-rw-r--r-- | gcc/config/i386/sse.md | 2532 |
1 files changed, 2283 insertions, 249 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e9f6c3d..5bc8586 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -81,9 +81,104 @@ (define_mode_iterator VI8 [(V4DI "TARGET_AVX") V2DI]) +(define_mode_iterator VI1_AVX2 + [(V32QI "TARGET_AVX2") V16QI]) + +(define_mode_iterator VI2_AVX2 + [(V16HI "TARGET_AVX2") V8HI]) + +(define_mode_iterator VI4_AVX2 + [(V8SI "TARGET_AVX2") V4SI]) + +(define_mode_iterator VI8_AVX2 + [(V4DI "TARGET_AVX2") V2DI]) + +(define_mode_iterator VIMAX_AVX2 + [(V4DI "TARGET_AVX2") V1TI]) + +(define_mode_iterator SSESCALARMODE + [(V4DI "TARGET_AVX2") TI]) + +(define_mode_iterator VI12_AVX2 + [(V32QI "TARGET_AVX2") V16QI + (V16HI "TARGET_AVX2") V8HI]) + +(define_mode_iterator VI24_AVX2 + [(V16HI "TARGET_AVX2") V8HI + (V8SI "TARGET_AVX2") V4SI]) + +(define_mode_iterator VI124_AVX2 + [(V32QI "TARGET_AVX2") V16QI + (V16HI "TARGET_AVX2") V8HI + (V8SI "TARGET_AVX2") V4SI]) + +(define_mode_iterator VI248_AVX2 + [(V16HI "TARGET_AVX2") V8HI + (V8SI "TARGET_AVX2") V4SI + (V4DI "TARGET_AVX2") V2DI]) + +(define_mode_iterator VI48_AVX2 + [V8SI V4SI V4DI V2DI]) + +(define_mode_iterator VI4SD_AVX2 + [V4SI V4DI]) + +(define_mode_iterator V48_AVX2 + [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE2") + (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") + (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") + (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) + +(define_mode_attr sse2_avx2 + [(V16QI "sse2") (V32QI "avx2") + (V8HI "sse2") (V16HI "avx2") + (V4SI "sse2") (V8SI "avx2") + (V2DI "sse2") (V4DI "avx2") + (V1TI "sse2")]) + +(define_mode_attr ssse3_avx2 + [(V16QI "ssse3") (V32QI "avx2") + (V8HI "ssse3") (V16HI "avx2") + (V4SI "ssse3") (V8SI "avx2") + (V2DI "ssse3") (V4DI "avx2") + (TI "ssse3")]) + +(define_mode_attr sse4_1_avx2 + [(V16QI "sse4_1") (V32QI "avx2") + (V8HI "sse4_1") (V16HI "avx2") + (V4SI "sse4_1") (V8SI "avx2") + (V2DI "sse4_1") (V4DI "avx2")]) + +(define_mode_attr avx_avx2 + [(V4SF "avx") (V2DF "avx") + (V8SF "avx") (V4DF "avx") + (V4SI "avx2") (V2DI "avx2") + (V8SI "avx2") (V4DI "avx2")]) + +;; Mapping of logic-shift operators +(define_code_iterator lshift [lshiftrt ashift]) + +;; Base name for define_insn +(define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")]) + +;; Base name for insn mnemonic +(define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")]) + +(define_mode_attr ssedoublemode + [(V16HI "V16SI") (V8HI "V8SI")]) + +(define_mode_attr ssebytemode + [(V4DI "V32QI") (V2DI "V16QI")]) + +(define_mode_attr shortmode + [(V4DI "v4si") (V2DI "v2si")]) + ;; All 128bit vector integer modes (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI]) +;; All 256bit vector integer modes +(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI]) + ;; Random 128bit vector integer mode combinations (define_mode_iterator VI12_128 [V16QI V8HI]) (define_mode_iterator VI14_128 [V16QI V4SI]) @@ -91,6 +186,11 @@ (define_mode_iterator VI24_128 [V8HI V4SI]) (define_mode_iterator VI248_128 [V8HI V4SI V2DI]) +;; Random 256bit vector integer mode combinations +(define_mode_iterator VI124_256 [V32QI V16HI V8SI]) +(define_mode_iterator VI1248_256 [V32QI V16HI V8SI V4DI]) +(define_mode_iterator VI248_256 [V16HI V8SI V4DI]) + ;; Int-float size matches (define_mode_iterator VI4F_128 [V4SI V4SF]) (define_mode_iterator VI8F_128 [V2DI V2DF]) @@ -125,12 +225,16 @@ [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") (V8SF "V8SF") (V4DF "V4DF") - (V4SF "V4SF") (V2DF "V2DF")]) + (V4SF "V4SF") (V2DF "V2DF") + (TI "TI") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")]) ;; Mapping of vector float modes to an integer mode of the same size (define_mode_attr sseintvecmode [(V8SF "V8SI") (V4DF "V4DI") - (V4SF "V4SI") (V2DF "V2DI")]) + (V4SF "V4SI") (V2DF "V2DI") + (V4DF "V4DI") (V8SF "V8SI") + (V8SI "V8SI") (V4DI "V4DI") + (V4SI "V4SI") (V2DI "V2DI")]) ;; Mapping of vector modes to a vector mode of double size (define_mode_attr ssedoublevecmode @@ -162,17 +266,20 @@ ;; SSE scalar suffix for vector modes (define_mode_attr ssescalarmodesuffix - [(V8SF "ss") (V4DF "sd") + [(SF "ss") (DF "sd") + (V8SF "ss") (V4DF "sd") (V4SF "ss") (V2DF "sd") (V8SI "ss") (V4DI "sd") (V4SI "d")]) ;; Pack/unpack vector modes (define_mode_attr sseunpackmode - [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")]) + [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI") + (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")]) (define_mode_attr ssepackmode - [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")]) + [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI") + (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")]) ;; Mapping of the max integer size for xop rotate immediate constraint (define_mode_attr sserotatemax @@ -184,11 +291,27 @@ ;; Instruction suffix for sign and zero extensions. (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) - - ;; Mix-n-match (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) +(define_mode_iterator AVXMODE48P_DI + [V2DI V2DF V4DI V4DF V4SF V4SI]) +(define_mode_attr AVXMODE48P_DI + [(V2DI "V2DI") (V2DF "V2DI") + (V4DI "V4DI") (V4DF "V4DI") + (V4SI "V2DI") (V4SF "V2DI") + (V8SI "V4DI") (V8SF "V4DI")]) +(define_mode_attr gthrfirstp + [(V2DI "p") (V2DF "") + (V4DI "p") (V4DF "") + (V4SI "p") (V4SF "") + (V8SI "p") (V8SF "")]) +(define_mode_attr gthrlastp + [(V2DI "q") (V2DF "pd") + (V4DI "q") (V4DF "pd") + (V4SI "d") (V4SF "ps") + (V8SI "d") (V8SF "ps")]) + (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) ;; Mapping of immediate bits for blend instructions @@ -229,7 +352,7 @@ case 1: case 2: switch (get_attr_mode (insn)) - { + { case MODE_V8SF: case MODE_V4SF: if (TARGET_AVX @@ -272,10 +395,10 @@ (set_attr "prefix" "maybe_vex") (set (attr "mode") (cond [(ne (symbol_ref "TARGET_AVX") (const_int 0)) - (const_string "<sseinsnmode>") + (const_string "<sseinsnmode>") (ior (ior - (ne (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) (eq (symbol_ref "TARGET_SSE2") (const_int 0))) (and (eq_attr "alternative" "2") (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") @@ -325,15 +448,15 @@ /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). Assemble the 64-bit DImode value in an xmm register. */ emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), - gen_rtx_SUBREG (SImode, operands[1], 0))); + gen_rtx_SUBREG (SImode, operands[1], 0))); emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), gen_rtx_SUBREG (SImode, operands[1], 4))); emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], - operands[2])); + operands[2])); } else if (memory_operand (operands[1], DImode)) emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), - operands[1], const0_rtx)); + operands[1], const0_rtx)); else gcc_unreachable (); }) @@ -1281,12 +1404,12 @@ (define_expand "vcond<mode>" [(set (match_operand:VF 0 "register_operand" "") - (if_then_else:VF - (match_operator 3 "" - [(match_operand:VF 4 "nonimmediate_operand" "") - (match_operand:VF 5 "nonimmediate_operand" "")]) - (match_operand:VF 1 "general_operand" "") - (match_operand:VF 2 "general_operand" "")))] + (if_then_else:VF + (match_operator 3 "" + [(match_operand:VF 4 "nonimmediate_operand" "") + (match_operand:VF 5 "nonimmediate_operand" "")]) + (match_operand:VF 1 "general_operand" "") + (match_operand:VF 2 "general_operand" "")))] "TARGET_SSE" { bool ok = ix86_expand_fp_vcond (operands); @@ -2579,7 +2702,7 @@ (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)]))) (set (match_operand:V2DF 0 "register_operand" "") - (float:V2DF + (float:V2DF (vec_select:V2SI (match_dup 2) (parallel [(const_int 0) (const_int 1)]))))] @@ -2601,7 +2724,7 @@ (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)]))) (set (match_operand:V4DF 0 "register_operand" "") - (float:V4DF + (float:V4DF (match_dup 2)))] "TARGET_AVX" "operands[2] = gen_reg_rtx (V4SImode);") @@ -2622,7 +2745,7 @@ (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)]))) (set (match_dup 6) - (float:V2DF + (float:V2DF (vec_select:V2SI (match_dup 5) (parallel [(const_int 0) (const_int 1)])))) @@ -2728,8 +2851,8 @@ emit_insn (gen_sse2_cvttpd2dq (r1, operands[1])); emit_insn (gen_sse2_cvttpd2dq (r2, operands[2])); emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), - gen_lowpart (V2DImode, r1), - gen_lowpart (V2DImode, r2))); + gen_lowpart (V2DImode, r1), + gen_lowpart (V2DImode, r2))); DONE; }) @@ -2747,8 +2870,8 @@ emit_insn (gen_sse2_cvtpd2dq (r1, operands[1])); emit_insn (gen_sse2_cvtpd2dq (r2, operands[2])); emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), - gen_lowpart (V2DImode, r1), - gen_lowpart (V2DImode, r2))); + gen_lowpart (V2DImode, r1), + gen_lowpart (V2DImode, r2))); DONE; }) @@ -3290,6 +3413,18 @@ operands[1] = force_reg (SFmode, operands[1]); }) +(define_insn "avx2_vec_dupv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_duplicate:V4SF + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_AVX2" + "vbroadcastss\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1") + (set_attr "prefix" "vex") + (set_attr "mode" "V4SF")]) + (define_insn "*vec_dupv4sf_avx" [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_duplicate:V4SF @@ -3304,6 +3439,18 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4SF")]) +(define_insn "avx2_vec_dupv8sf" + [(set (match_operand:V8SF 0 "register_operand" "=x") + (vec_duplicate:V8SF + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_AVX2" + "vbroadcastss\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1") + (set_attr "prefix" "vex") + (set_attr "mode" "V8SF")]) + (define_insn "*vec_dupv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_duplicate:V4SF @@ -3899,7 +4046,7 @@ (match_dup 3) (match_dup 4)) (parallel [(const_int 0) (const_int 1) - (const_int 4) (const_int 5)])))] + (const_int 4) (const_int 5)])))] "TARGET_AVX" { operands[3] = gen_reg_rtx (V4DFmode); @@ -4059,6 +4206,21 @@ }) ;; punpcklqdq and punpckhqdq are shorter than shufpd. +(define_insn "avx2_interleave_highv4di" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (vec_select:V4DI + (vec_concat:V8DI + (match_operand:V4DI 1 "register_operand" "x") + (match_operand:V4DI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 1) + (const_int 5) + (const_int 3) + (const_int 7)])))] + "TARGET_AVX2" + "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) (define_insn "vec_interleave_highv2di" [(set (match_operand:V2DI 0 "register_operand" "=x,x") @@ -4078,6 +4240,22 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_insn "avx2_interleave_lowv4di" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (vec_select:V4DI + (vec_concat:V8DI + (match_operand:V4DI 1 "register_operand" "x") + (match_operand:V4DI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 4) + (const_int 2) + (const_int 6)])))] + "TARGET_AVX2" + "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "vec_interleave_lowv2di" [(set (match_operand:V2DI 0 "register_operand" "=x,x") (vec_select:V2DI @@ -4463,18 +4641,18 @@ "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") (define_expand "<plusminus_insn><mode>3" - [(set (match_operand:VI_128 0 "register_operand" "") - (plusminus:VI_128 - (match_operand:VI_128 1 "nonimmediate_operand" "") - (match_operand:VI_128 2 "nonimmediate_operand" "")))] + [(set (match_operand:VI 0 "register_operand" "") + (plusminus:VI + (match_operand:VI 1 "nonimmediate_operand" "") + (match_operand:VI 2 "nonimmediate_operand" "")))] "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*<plusminus_insn><mode>3" - [(set (match_operand:VI_128 0 "register_operand" "=x,x") - (plusminus:VI_128 - (match_operand:VI_128 1 "nonimmediate_operand" "<comm>0,x") - (match_operand:VI_128 2 "nonimmediate_operand" "xm,xm")))] + [(set (match_operand:VI 0 "register_operand" "=x,x") + (plusminus:VI + (match_operand:VI 1 "nonimmediate_operand" "<comm>0,x") + (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))] "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} @@ -4483,21 +4661,21 @@ (set_attr "type" "sseiadd") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) -(define_expand "sse2_<plusminus_insn><mode>3" - [(set (match_operand:VI12_128 0 "register_operand" "") - (sat_plusminus:VI12_128 - (match_operand:VI12_128 1 "nonimmediate_operand" "") - (match_operand:VI12_128 2 "nonimmediate_operand" "")))] +(define_expand "<sse2_avx2>_<plusminus_insn><mode>3" + [(set (match_operand:VI12_AVX2 0 "register_operand" "") + (sat_plusminus:VI12_AVX2 + (match_operand:VI12_AVX2 1 "nonimmediate_operand" "") + (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))] "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") -(define_insn "*sse2_<plusminus_insn><mode>3" - [(set (match_operand:VI12_128 0 "register_operand" "=x,x") - (sat_plusminus:VI12_128 - (match_operand:VI12_128 1 "nonimmediate_operand" "<comm>0,x") - (match_operand:VI12_128 2 "nonimmediate_operand" "xm,xm")))] +(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3" + [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x") + (sat_plusminus:VI12_AVX2 + (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x") + (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))] "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} @@ -4548,18 +4726,18 @@ DONE; }) -(define_expand "mulv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "") - (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") - (match_operand:V8HI 2 "nonimmediate_operand" "")))] +(define_expand "mul<mode>3" + [(set (match_operand:VI2_AVX2 0 "register_operand" "") + (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "") + (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))] "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") + "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") -(define_insn "*mulv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))] - "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" +(define_insn "*mul<mode>3" + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") + (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x") + (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" "@ pmullw\t{%2, %0|%0, %2} vpmullw\t{%2, %1, %0|%0, %1, %2}" @@ -4567,32 +4745,32 @@ (set_attr "type" "sseimul") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) -(define_expand "<s>mulv8hi3_highpart" - [(set (match_operand:V8HI 0 "register_operand" "") - (truncate:V8HI - (lshiftrt:V8SI - (mult:V8SI - (any_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "")) - (any_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" ""))) - (const_int 16))))] +(define_expand "<s>mul<mode>3_highpart" + [(set (match_operand:VI2_AVX2 0 "register_operand" "") + (truncate:VI2_AVX2 + (lshiftrt:<ssedoublemode> + (mult:<ssedoublemode> + (any_extend:<ssedoublemode> + (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")) + (any_extend:<ssedoublemode> + (match_operand:VI2_AVX2 2 "nonimmediate_operand" ""))) + (const_int 16))))] "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") -(define_insn "*<s>mulv8hi3_highpart" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (truncate:V8HI - (lshiftrt:V8SI - (mult:V8SI - (any_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")) - (any_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))) +(define_insn "*<s>mul<mode>3_highpart" + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") + (truncate:VI2_AVX2 + (lshiftrt:<ssedoublemode> + (mult:<ssedoublemode> + (any_extend:<ssedoublemode> + (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")) + (any_extend:<ssedoublemode> + (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm"))) (const_int 16))))] - "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" "@ pmulh<u>w\t{%2, %0|%0, %2} vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}" @@ -4600,7 +4778,42 @@ (set_attr "type" "sseimul") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx2_umulv4siv4di3" + [(set (match_operand:V4DI 0 "register_operand" "") + (mult:V4DI + (zero_extend:V4DI + (vec_select:V4SI + (match_operand:V8SI 1 "nonimmediate_operand" "") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) + (zero_extend:V4DI + (vec_select:V4SI + (match_operand:V8SI 2 "nonimmediate_operand" "") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))))] + "TARGET_AVX2" + "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") + +(define_insn "*avx_umulv4siv4di3" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (mult:V4DI + (zero_extend:V4DI + (vec_select:V4SI + (match_operand:V8SI 1 "nonimmediate_operand" "%x") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) + (zero_extend:V4DI + (vec_select:V4SI + (match_operand:V8SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))))] + "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)" + "vpmuludq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseimul") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) (define_expand "sse2_umulv2siv2di3" [(set (match_operand:V2DI 0 "register_operand" "") @@ -4637,6 +4850,43 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_expand "avx2_mulv4siv4di3" + [(set (match_operand:V4DI 0 "register_operand" "") + (mult:V4DI + (sign_extend:V4DI + (vec_select:V4SI + (match_operand:V8SI 1 "nonimmediate_operand" "") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) + (sign_extend:V4DI + (vec_select:V4SI + (match_operand:V8SI 2 "nonimmediate_operand" "") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))))] + "TARGET_AVX2" + "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") + +(define_insn "*avx2_mulv4siv4di3" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (mult:V4DI + (sign_extend:V4DI + (vec_select:V4SI + (match_operand:V8SI 1 "nonimmediate_operand" "x") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) + (sign_extend:V4DI + (vec_select:V4SI + (match_operand:V8SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))))] + "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)" + "vpmuldq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "avx") + (set_attr "type" "sseimul") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_expand "sse4_1_mulv2siv2di3" [(set (match_operand:V2DI 0 "register_operand" "") (mult:V2DI @@ -4673,6 +4923,56 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_expand "avx2_pmaddwd" + [(set (match_operand:V8SI 0 "register_operand" "") + (plus:V8SI + (mult:V8SI + (sign_extend:V8SI + (vec_select:V8HI + (match_operand:V16HI 1 "nonimmediate_operand" "") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (sign_extend:V8SI + (vec_select:V8HI + (match_operand:V16HI 2 "nonimmediate_operand" "") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)])))) + (mult:V8SI + (sign_extend:V8SI + (vec_select:V8HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)]))) + (sign_extend:V8SI + (vec_select:V8HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)]))))))] + "TARGET_AVX2" + "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") + (define_expand "sse2_pmaddwd" [(set (match_operand:V4SI 0 "register_operand" "") (plus:V4SI @@ -4707,6 +5007,59 @@ "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") +(define_insn "*avx2_pmaddwd" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (plus:V8SI + (mult:V8SI + (sign_extend:V8SI + (vec_select:V8HI + (match_operand:V16HI 1 "nonimmediate_operand" "%x") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (sign_extend:V8SI + (vec_select:V8HI + (match_operand:V16HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)])))) + (mult:V8SI + (sign_extend:V8SI + (vec_select:V8HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)]))) + (sign_extend:V8SI + (vec_select:V8HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)]))))))] + "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" + "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "*sse2_pmaddwd" [(set (match_operand:V4SI 0 "register_operand" "=x,x") (plus:V4SI @@ -4749,21 +5102,21 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_expand "mulv4si3" - [(set (match_operand:V4SI 0 "register_operand" "") - (mult:V4SI (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "register_operand" "")))] +(define_expand "mul<mode>3" + [(set (match_operand:VI4_AVX2 0 "register_operand" "") + (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "") + (match_operand:VI4_AVX2 2 "register_operand" "")))] "TARGET_SSE2" { if (TARGET_SSE4_1 || TARGET_AVX) - ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands); + ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); }) -(define_insn "*sse4_1_mulv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") - (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0,x") - (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")))] - "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)" +(define_insn "*<sse4_1_avx2>_mul<mode>3" + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x") + (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x") + (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))] + "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" "@ pmulld\t{%2, %0|%0, %2} vpmulld\t{%2, %1, %0|%0, %1, %2}" @@ -4771,7 +5124,7 @@ (set_attr "type" "sseimul") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) (define_insn_and_split "*sse2_mulv4si3" [(set (match_operand:V4SI 0 "register_operand" "") @@ -4885,7 +5238,7 @@ /* Multiply low parts. */ emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), - gen_lowpart (V4SImode, op2))); + gen_lowpart (V4SImode, op2))); /* Shift input vectors left 32 bits so we can multiply high parts. */ emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); @@ -5119,9 +5472,9 @@ }) (define_insn "ashr<mode>3" - [(set (match_operand:VI24_128 0 "register_operand" "=x,x") - (ashiftrt:VI24_128 - (match_operand:VI24_128 1 "register_operand" "0,x") + [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") + (ashiftrt:VI24_AVX2 + (match_operand:VI24_AVX2 1 "register_operand" "0,x") (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] "TARGET_SSE2" "@ @@ -5135,12 +5488,27 @@ (const_string "0"))) (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx2_lshrqv4di3" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (lshiftrt:V4DI + (match_operand:V4DI 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_AVX2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "length_immediate" "1") + (set_attr "mode" "OI")]) (define_insn "lshr<mode>3" - [(set (match_operand:VI248_128 0 "register_operand" "=x,x") - (lshiftrt:VI248_128 - (match_operand:VI248_128 1 "register_operand" "0,x") + [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x") + (lshiftrt:VI248_AVX2 + (match_operand:VI248_AVX2 1 "register_operand" "0,x") (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] "TARGET_SSE2" "@ @@ -5154,7 +5522,36 @@ (const_string "0"))) (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx2_lshlqv4di3" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (ashift:V4DI (match_operand:V4DI 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_AVX2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "length_immediate" "1") + (set_attr "mode" "OI")]) + +(define_insn "avx2_lshl<mode>3" + [(set (match_operand:VI248_256 0 "register_operand" "=x") + (ashift:VI248_256 + (match_operand:VI248_256 1 "register_operand" "x") + (match_operand:SI 2 "nonmemory_operand" "xN")))] + "TARGET_AVX2" + "vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set (attr "length_immediate") + (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "1") + (const_string "0"))) + (set_attr "mode" "OI")]) (define_insn "ashl<mode>3" [(set (match_operand:VI248_128 0 "register_operand" "=x,x") @@ -5177,7 +5574,7 @@ (define_expand "vec_shl_<mode>" [(set (match_operand:VI_128 0 "register_operand" "") - (ashift:V1TI + (ashift:V1TI (match_operand:VI_128 1 "register_operand" "") (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] "TARGET_SSE2" @@ -5186,10 +5583,10 @@ operands[1] = gen_lowpart (V1TImode, operands[1]); }) -(define_insn "sse2_ashlv1ti3" - [(set (match_operand:V1TI 0 "register_operand" "=x,x") - (ashift:V1TI - (match_operand:V1TI 1 "register_operand" "0,x") +(define_insn "<sse2_avx2>_ashl<mode>3" + [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x") + (ashift:VIMAX_AVX2 + (match_operand:VIMAX_AVX2 1 "register_operand" "0,x") (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] "TARGET_SSE2" { @@ -5210,11 +5607,11 @@ (set_attr "length_immediate" "1") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) (define_expand "vec_shr_<mode>" [(set (match_operand:VI_128 0 "register_operand" "") - (lshiftrt:V1TI + (lshiftrt:V1TI (match_operand:VI_128 1 "register_operand" "") (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] "TARGET_SSE2" @@ -5223,9 +5620,29 @@ operands[1] = gen_lowpart (V1TImode, operands[1]); }) +(define_expand "avx2_<code><mode>3" + [(set (match_operand:VI124_256 0 "register_operand" "") + (umaxmin:VI124_256 + (match_operand:VI124_256 1 "nonimmediate_operand" "") + (match_operand:VI124_256 2 "nonimmediate_operand" "")))] + "TARGET_AVX2" + "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") + +(define_insn "*avx2_<code><mode>3" + [(set (match_operand:VI124_256 0 "register_operand" "=x") + (umaxmin:VI124_256 + (match_operand:VI124_256 1 "nonimmediate_operand" "%x") + (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))] + "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" + "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "sse2_lshrv1ti3" [(set (match_operand:V1TI 0 "register_operand" "=x,x") - (lshiftrt:V1TI + (lshiftrt:V1TI (match_operand:V1TI 1 "register_operand" "0,x") (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] "TARGET_SSE2" @@ -5250,6 +5667,26 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_expand "avx2_<code><mode>3" + [(set (match_operand:VI124_256 0 "register_operand" "") + (smaxmin:VI124_256 + (match_operand:VI124_256 1 "nonimmediate_operand" "") + (match_operand:VI124_256 2 "nonimmediate_operand" "")))] + "TARGET_AVX2" + "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") + +(define_insn "*avx2_<code><mode>3" + [(set (match_operand:VI124_256 0 "register_operand" "=x") + (smaxmin:VI124_256 + (match_operand:VI124_256 1 "nonimmediate_operand" "%x") + (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))] + "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" + "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "*sse4_1_<code><mode>3" [(set (match_operand:VI14_128 0 "register_operand" "=x,x") (smaxmin:VI14_128 @@ -5533,6 +5970,26 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_expand "avx2_eq<mode>3" + [(set (match_operand:VI1248_256 0 "register_operand" "") + (eq:VI1248_256 + (match_operand:VI1248_256 1 "nonimmediate_operand" "") + (match_operand:VI1248_256 2 "nonimmediate_operand" "")))] + "TARGET_AVX2" + "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") + +(define_insn "*avx2_eq<mode>3" + [(set (match_operand:VI1248_256 0 "register_operand" "=x") + (eq:VI1248_256 + (match_operand:VI1248_256 1 "nonimmediate_operand" "%x") + (match_operand:VI1248_256 2 "nonimmediate_operand" "xm")))] + "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" + "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecmp") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "*sse4_1_eqv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x,x") (eq:V2DI @@ -5595,6 +6052,18 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_insn "avx2_gt<mode>3" + [(set (match_operand:VI1248_256 0 "register_operand" "=x") + (gt:VI1248_256 + (match_operand:VI1248_256 1 "register_operand" "x") + (match_operand:VI1248_256 2 "nonimmediate_operand" "xm")))] + "TARGET_AVX2" + "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecmp") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "sse2_gt<mode>3" [(set (match_operand:VI124_128 0 "register_operand" "=x,x") (gt:VI124_128 @@ -5612,12 +6081,12 @@ (define_expand "vcond<mode>" [(set (match_operand:VI124_128 0 "register_operand" "") - (if_then_else:VI124_128 - (match_operator 3 "" - [(match_operand:VI124_128 4 "nonimmediate_operand" "") - (match_operand:VI124_128 5 "nonimmediate_operand" "")]) - (match_operand:VI124_128 1 "general_operand" "") - (match_operand:VI124_128 2 "general_operand" "")))] + (if_then_else:VI124_128 + (match_operator 3 "" + [(match_operand:VI124_128 4 "nonimmediate_operand" "") + (match_operand:VI124_128 5 "nonimmediate_operand" "")]) + (match_operand:VI124_128 1 "general_operand" "") + (match_operand:VI124_128 2 "general_operand" "")))] "TARGET_SSE2" { bool ok = ix86_expand_int_vcond (operands); @@ -5627,12 +6096,12 @@ (define_expand "vcondv2di" [(set (match_operand:V2DI 0 "register_operand" "") - (if_then_else:V2DI - (match_operator 3 "" - [(match_operand:V2DI 4 "nonimmediate_operand" "") - (match_operand:V2DI 5 "nonimmediate_operand" "")]) - (match_operand:V2DI 1 "general_operand" "") - (match_operand:V2DI 2 "general_operand" "")))] + (if_then_else:V2DI + (match_operator 3 "" + [(match_operand:V2DI 4 "nonimmediate_operand" "") + (match_operand:V2DI 5 "nonimmediate_operand" "")]) + (match_operand:V2DI 1 "general_operand" "") + (match_operand:V2DI 2 "general_operand" "")))] "TARGET_SSE4_2" { bool ok = ix86_expand_int_vcond (operands); @@ -5642,12 +6111,12 @@ (define_expand "vcondu<mode>" [(set (match_operand:VI124_128 0 "register_operand" "") - (if_then_else:VI124_128 - (match_operator 3 "" - [(match_operand:VI124_128 4 "nonimmediate_operand" "") - (match_operand:VI124_128 5 "nonimmediate_operand" "")]) - (match_operand:VI124_128 1 "general_operand" "") - (match_operand:VI124_128 2 "general_operand" "")))] + (if_then_else:VI124_128 + (match_operator 3 "" + [(match_operand:VI124_128 4 "nonimmediate_operand" "") + (match_operand:VI124_128 5 "nonimmediate_operand" "")]) + (match_operand:VI124_128 1 "general_operand" "") + (match_operand:VI124_128 2 "general_operand" "")))] "TARGET_SSE2" { bool ok = ix86_expand_int_vcond (operands); @@ -5657,12 +6126,12 @@ (define_expand "vconduv2di" [(set (match_operand:V2DI 0 "register_operand" "") - (if_then_else:V2DI - (match_operator 3 "" - [(match_operand:V2DI 4 "nonimmediate_operand" "") - (match_operand:V2DI 5 "nonimmediate_operand" "")]) - (match_operand:V2DI 1 "general_operand" "") - (match_operand:V2DI 2 "general_operand" "")))] + (if_then_else:V2DI + (match_operator 3 "" + [(match_operand:V2DI 4 "nonimmediate_operand" "") + (match_operand:V2DI 5 "nonimmediate_operand" "")]) + (match_operand:V2DI 1 "general_operand" "") + (match_operand:V2DI 2 "general_operand" "")))] "TARGET_SSE4_2" { bool ok = ix86_expand_int_vcond (operands); @@ -5691,11 +6160,11 @@ operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); }) -(define_expand "sse2_andnot<mode>3" - [(set (match_operand:VI_128 0 "register_operand" "") - (and:VI_128 - (not:VI_128 (match_operand:VI_128 1 "register_operand" "")) - (match_operand:VI_128 2 "nonimmediate_operand" "")))] +(define_expand "<sse2_avx2>_andnot<mode>3" + [(set (match_operand:VI 0 "register_operand" "") + (and:VI + (not:VI (match_operand:VI 1 "register_operand" "")) + (match_operand:VI 2 "nonimmediate_operand" "")))] "TARGET_SSE2") (define_insn "*andnot<mode>3" @@ -5708,7 +6177,8 @@ static char buf[32]; const char *ops; const char *tmp - = (get_attr_mode (insn) == MODE_TI) ? "pandn" : "andnps"; + = ((get_attr_mode (insn) == MODE_TI) || + (get_attr_mode (insn) == MODE_OI)) ? "pandn" : "andnps"; switch (which_alternative) { @@ -5739,6 +6209,8 @@ (const_string "V8SF") (ne (symbol_ref "TARGET_SSE2") (const_int 0)) (const_string "TI") + (ne (symbol_ref "TARGET_AVX2") (const_int 0)) + (const_string "OI") ] (const_string "V4SF")))]) @@ -5761,7 +6233,8 @@ static char buf[32]; const char *ops; const char *tmp - = (get_attr_mode (insn) == MODE_TI) ? "p<logic>" : "<logic>ps"; + = (get_attr_mode (insn) == MODE_TI)|| + (get_attr_mode (insn) == MODE_OI) ? "p<logic>" : "<logic>ps"; switch (which_alternative) { @@ -5792,6 +6265,8 @@ (const_string "V8SF") (ne (symbol_ref "TARGET_SSE2") (const_int 0)) (const_string "TI") + (ne (symbol_ref "TARGET_AVX2") (const_int 0)) + (const_string "OI") ] (const_string "V4SF")))]) @@ -5852,13 +6327,13 @@ DONE; }) -(define_insn "sse2_packsswb" - [(set (match_operand:V16QI 0 "register_operand" "=x,x") - (vec_concat:V16QI - (ss_truncate:V8QI - (match_operand:V8HI 1 "register_operand" "0,x")) - (ss_truncate:V8QI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))] +(define_insn "<sse2_avx2>_packsswb" + [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") + (vec_concat:VI1_AVX2 + (ss_truncate:<ssehalfvecmode> + (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) + (ss_truncate:<ssehalfvecmode> + (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] "TARGET_SSE2" "@ packsswb\t{%2, %0|%0, %2} @@ -5867,15 +6342,15 @@ (set_attr "type" "sselog") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) -(define_insn "sse2_packssdw" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (vec_concat:V8HI - (ss_truncate:V4HI - (match_operand:V4SI 1 "register_operand" "0,x")) - (ss_truncate:V4HI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))] +(define_insn "<sse2_avx2>_packssdw" + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") + (vec_concat:VI2_AVX2 + (ss_truncate:<ssehalfvecmode> + (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) + (ss_truncate:<ssehalfvecmode> + (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] "TARGET_SSE2" "@ packssdw\t{%2, %0|%0, %2} @@ -5884,15 +6359,15 @@ (set_attr "type" "sselog") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) -(define_insn "sse2_packuswb" - [(set (match_operand:V16QI 0 "register_operand" "=x,x") - (vec_concat:V16QI - (us_truncate:V8QI - (match_operand:V8HI 1 "register_operand" "0,x")) - (us_truncate:V8QI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))] +(define_insn "<sse2_avx2>_packuswb" + [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") + (vec_concat:VI1_AVX2 + (us_truncate:<ssehalfvecmode> + (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) + (us_truncate:<ssehalfvecmode> + (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] "TARGET_SSE2" "@ packuswb\t{%2, %0|%0, %2} @@ -5901,7 +6376,36 @@ (set_attr "type" "sselog") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx2_interleave_highv32qi" + [(set (match_operand:V32QI 0 "register_operand" "=x") + (vec_select:V32QI + (vec_concat:V64QI + (match_operand:V32QI 1 "register_operand" "x") + (match_operand:V32QI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 8) (const_int 40) + (const_int 9) (const_int 41) + (const_int 10) (const_int 42) + (const_int 11) (const_int 43) + (const_int 12) (const_int 44) + (const_int 13) (const_int 45) + (const_int 14) (const_int 46) + (const_int 15) (const_int 47) + (const_int 24) (const_int 56) + (const_int 25) (const_int 57) + (const_int 26) (const_int 58) + (const_int 27) (const_int 59) + (const_int 28) (const_int 60) + (const_int 29) (const_int 61) + (const_int 30) (const_int 62) + (const_int 31) (const_int 63) + (const_int 32) (const_int 64)])))] + "TARGET_AVX2" + "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) (define_insn "vec_interleave_highv16qi" [(set (match_operand:V16QI 0 "register_operand" "=x,x") @@ -5927,6 +6431,35 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_insn "avx2_interleave_lowv32qi" + [(set (match_operand:V32QI 0 "register_operand" "=x") + (vec_select:V32QI + (vec_concat:V64QI + (match_operand:V32QI 1 "register_operand" "x") + (match_operand:V32QI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 32) + (const_int 1) (const_int 33) + (const_int 2) (const_int 34) + (const_int 3) (const_int 35) + (const_int 4) (const_int 36) + (const_int 5) (const_int 37) + (const_int 6) (const_int 38) + (const_int 7) (const_int 39) + (const_int 15) (const_int 47) + (const_int 16) (const_int 48) + (const_int 17) (const_int 49) + (const_int 18) (const_int 50) + (const_int 19) (const_int 51) + (const_int 20) (const_int 52) + (const_int 21) (const_int 53) + (const_int 22) (const_int 54) + (const_int 23) (const_int 55)])))] + "TARGET_AVX2" + "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "vec_interleave_lowv16qi" [(set (match_operand:V16QI 0 "register_operand" "=x,x") (vec_select:V16QI @@ -5951,6 +6484,26 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_insn "avx2_interleave_highv16hi" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_select:V16HI + (vec_concat:V32HI + (match_operand:V16HI 1 "register_operand" "x") + (match_operand:V16HI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 4) (const_int 20) + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23) + (const_int 12) (const_int 28) + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] + "TARGET_AVX2" + "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "vec_interleave_highv8hi" [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_select:V8HI @@ -5971,6 +6524,26 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_insn "avx2_interleave_lowv16hi" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_select:V16HI + (vec_concat:V32HI + (match_operand:V16HI 1 "register_operand" "x") + (match_operand:V16HI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 16) + (const_int 1) (const_int 17) + (const_int 2) (const_int 18) + (const_int 3) (const_int 19) + (const_int 8) (const_int 24) + (const_int 9) (const_int 25) + (const_int 10) (const_int 26) + (const_int 11) (const_int 27)])))] + "TARGET_AVX2" + "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "vec_interleave_lowv8hi" [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_select:V8HI @@ -5991,6 +6564,22 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_insn "avx2_interleave_highv8si" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (vec_select:V8SI + (vec_concat:V16SI + (match_operand:V8SI 1 "register_operand" "x") + (match_operand:V8SI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 2) (const_int 10) + (const_int 3) (const_int 11) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "TARGET_AVX2" + "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "vec_interleave_highv4si" [(set (match_operand:V4SI 0 "register_operand" "=x,x") (vec_select:V4SI @@ -6009,6 +6598,22 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_insn "avx2_interleave_lowv8si" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (vec_select:V8SI + (vec_concat:V16SI + (match_operand:V8SI 1 "register_operand" "x") + (match_operand:V8SI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 4) (const_int 12) + (const_int 5) (const_int 13)])))] + "TARGET_AVX2" + "vpunpckldq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "vec_interleave_lowv4si" [(set (match_operand:V4SI 0 "register_operand" "=x,x") (vec_select:V4SI @@ -6055,13 +6660,13 @@ { case 0: if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) - return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}"; + return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}"; /* FALLTHRU */ case 1: return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"; case 2: if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) - return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; + return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; /* FALLTHRU */ case 3: return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; @@ -6189,6 +6794,49 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +(define_expand "avx2_pshufdv3" + [(match_operand:V8SI 0 "register_operand" "") + (match_operand:V8SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_0_to_255_operand" "")] + "TARGET_AVX2" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3))); + DONE; +}) + +(define_insn "avx2_pshufd_1" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (vec_select:V8SI + (match_operand:V8SI 1 "nonimmediate_operand" "xm") + (parallel [(match_operand 2 "const_0_to_3_operand" "") + (match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_0_to_3_operand" "") + (match_dup 2) + (match_dup 3) + (match_dup 4) + (match_dup 5)])))] + "TARGET_AVX2" +{ + int mask = 0; + mask |= INTVAL (operands[2]) << 0; + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + operands[2] = GEN_INT (mask); + + return "vpshufd\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sselog1") + (set_attr "prefix" "vex") + (set_attr "length_immediate" "1") + (set_attr "mode" "OI")]) + (define_expand "sse2_pshufd" [(match_operand:V4SI 0 "register_operand" "") (match_operand:V4SI 1 "nonimmediate_operand" "") @@ -6229,6 +6877,57 @@ (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) +(define_expand "avx2_pshuflwv3" + [(match_operand:V16HI 0 "register_operand" "") + (match_operand:V16HI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_0_to_255_operand" "")] + "TARGET_AVX2" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3))); + DONE; +}) + +(define_insn "avx2_pshuflw_1" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_select:V16HI + (match_operand:V16HI 1 "nonimmediate_operand" "xm") + (parallel [(match_operand 2 "const_0_to_3_operand" "") + (match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_0_to_3_operand" "") + (const_int 4) + (const_int 5) + (const_int 6) + (const_int 7) + (match_dup 2) + (match_dup 3) + (match_dup 4) + (match_dup 5) + (const_int 12) + (const_int 13) + (const_int 14) + (const_int 15)])))] + "TARGET_AVX2" +{ + int mask = 0; + mask |= INTVAL (operands[2]) << 0; + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + operands[2] = GEN_INT (mask); + + return "vpshuflw\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "length_immediate" "1") + (set_attr "mode" "OI")]) + (define_expand "sse2_pshuflw" [(match_operand:V8HI 0 "register_operand" "") (match_operand:V8HI 1 "nonimmediate_operand" "") @@ -6274,6 +6973,57 @@ (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) +(define_expand "avx2_pshufhwv3" + [(match_operand:V16HI 0 "register_operand" "") + (match_operand:V16HI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_0_to_255_operand" "")] + "TARGET_AVX2" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1], + GEN_INT (((mask >> 0) & 3) + 4), + GEN_INT (((mask >> 2) & 3) + 4), + GEN_INT (((mask >> 4) & 3) + 4), + GEN_INT (((mask >> 6) & 3) + 4))); + DONE; +}) + +(define_insn "avx2_pshufhw_1" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_select:V16HI + (match_operand:V16HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (match_operand 2 "const_4_to_7_operand" "") + (match_operand 3 "const_4_to_7_operand" "") + (match_operand 4 "const_4_to_7_operand" "") + (match_operand 5 "const_4_to_7_operand" "") + (const_int 8) + (const_int 9) + (const_int 10) + (const_int 11) + (match_dup 2) + (match_dup 3) + (match_dup 4) + (match_dup 5)])))] + "TARGET_AVX2" +{ + int mask = 0; + mask |= (INTVAL (operands[2]) - 4) << 0; + mask |= (INTVAL (operands[3]) - 4) << 2; + mask |= (INTVAL (operands[4]) - 4) << 4; + mask |= (INTVAL (operands[5]) - 4) << 6; + operands[2] = GEN_INT (mask); + + return "vpshufhw\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "length_immediate" "1") + (set_attr "mode" "OI")]) + (define_expand "sse2_pshufhw" [(match_operand:V8HI 0 "register_operand" "") (match_operand:V8HI 1 "nonimmediate_operand" "") @@ -6665,6 +7415,36 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_expand "avx2_uavgv32qi3" + [(set (match_operand:V32QI 0 "register_operand" "") + (truncate:V32QI + (lshiftrt:V32HI + (plus:V32HI + (plus:V32HI + (zero_extend:V32HI + (match_operand:V32QI 1 "nonimmediate_operand" "")) + (zero_extend:V32HI + (match_operand:V32QI 2 "nonimmediate_operand" ""))) + (const_vector:V32QI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_AVX2" + "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);") + (define_expand "sse2_uavgv16qi3" [(set (match_operand:V16QI 0 "register_operand" "") (truncate:V16QI @@ -6687,6 +7467,39 @@ "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);") +(define_insn "*avx2_uavgv32qi3" + [(set (match_operand:V32QI 0 "register_operand" "=x") + (truncate:V32QI + (lshiftrt:V32HI + (plus:V32HI + (plus:V32HI + (zero_extend:V32HI + (match_operand:V32QI 1 "nonimmediate_operand" "%x")) + (zero_extend:V32HI + (match_operand:V32QI 2 "nonimmediate_operand" "xm"))) + (const_vector:V32QI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)" + "vpavgb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "*sse2_uavgv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x,x") (truncate:V16QI @@ -6716,6 +7529,28 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_expand "avx2_uavgv16hi3" + [(set (match_operand:V16HI 0 "register_operand" "") + (truncate:V16HI + (lshiftrt:V16SI + (plus:V16SI + (plus:V16SI + (zero_extend:V16SI + (match_operand:V16HI 1 "nonimmediate_operand" "")) + (zero_extend:V16SI + (match_operand:V16HI 2 "nonimmediate_operand" ""))) + (const_vector:V16HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_AVX2" + "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);") + (define_expand "sse2_uavgv8hi3" [(set (match_operand:V8HI 0 "register_operand" "") (truncate:V8HI @@ -6734,6 +7569,31 @@ "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);") +(define_insn "*avx2_uavgv16hi3" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (truncate:V16HI + (lshiftrt:V16SI + (plus:V16SI + (plus:V16SI + (zero_extend:V16SI + (match_operand:V16HI 1 "nonimmediate_operand" "%x")) + (zero_extend:V16SI + (match_operand:V16HI 2 "nonimmediate_operand" "xm"))) + (const_vector:V16HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)" + "vpavgw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "*sse2_uavgv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x,x") (truncate:V8HI @@ -6761,11 +7621,11 @@ ;; The correct representation for this is absolutely enormous, and ;; surely not generally useful. -(define_insn "sse2_psadbw" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0,x") - (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")] - UNSPEC_PSADBW))] +(define_insn "<sse2_avx2>_psadbw" + [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x") + (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x") + (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")] + UNSPEC_PSADBW))] "TARGET_SSE2" "@ psadbw\t{%2, %0|%0, %2} @@ -6775,7 +7635,7 @@ (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>" [(set (match_operand:SI 0 "register_operand" "=r") @@ -6788,6 +7648,16 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) +(define_insn "avx2_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_AVX2" + "vpmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "DI")]) + (define_insn "sse2_pmovmskb" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] @@ -6947,6 +7817,82 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_insn "avx2_phaddwv16hi3" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_concat:V16HI + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V16HI 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V16HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] + "TARGET_AVX2" + "vphaddw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "ssse3_phaddwv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI @@ -7025,6 +7971,50 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) +(define_insn "avx2_phadddv8si3" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (vec_concat:V8SI + (vec_concat:V4SI + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V8SI 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SI + (plus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) + (plus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4SI + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V8SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2SI + (plus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) + (plus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_AVX2" + "vphaddd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "ssse3_phadddv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x,x") (vec_concat:V4SI @@ -7079,6 +8069,82 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) +(define_insn "avx2_phaddswv16hi3" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_concat:V16HI + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V16HI 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V16HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] + "TARGET_AVX2" + "vphaddsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "ssse3_phaddswv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI @@ -7157,6 +8223,82 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) +(define_insn "avx2_phsubwv16hi3" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_concat:V16HI + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V16HI 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V16HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] + "TARGET_AVX2" + "vphsubw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "ssse3_phsubwv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI @@ -7235,6 +8377,50 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) +(define_insn "avx2_phsubdv8si3" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (vec_concat:V8SI + (vec_concat:V4SI + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V8SI 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SI + (minus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) + (minus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4SI + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V8SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2SI + (minus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) + (minus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_AVX2" + "vphsubd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "ssse3_phsubdv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x,x") (vec_concat:V4SI @@ -7290,6 +8476,82 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) +(define_insn "avx2_phsubswv16hi3" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_concat:V16HI + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V16HI 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V16HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] + "TARGET_AVX2" + "vphsubsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "ssse3_phsubswv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI @@ -7368,6 +8630,92 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) +(define_insn "avx2_pmaddubsw256" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (ss_plus:V16HI + (mult:V16HI + (zero_extend:V16HI + (vec_select:V16QI + (match_operand:V32QI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14) + (const_int 16) + (const_int 18) + (const_int 20) + (const_int 22) + (const_int 24) + (const_int 26) + (const_int 28) + (const_int 30)]))) + (sign_extend:V16HI + (vec_select:V16QI + (match_operand:V32QI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14) + (const_int 16) + (const_int 18) + (const_int 20) + (const_int 22) + (const_int 24) + (const_int 26) + (const_int 28) + (const_int 30)])))) + (mult:V16HI + (zero_extend:V16HI + (vec_select:V16QI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15) + (const_int 17) + (const_int 19) + (const_int 21) + (const_int 23) + (const_int 25) + (const_int 27) + (const_int 29) + (const_int 31)]))) + (sign_extend:V16HI + (vec_select:V16QI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15) + (const_int 17) + (const_int 19) + (const_int 21) + (const_int 23) + (const_int 25) + (const_int 27) + (const_int 29) + (const_int 31)]))))))] + "TARGET_AVX2" + "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "ssse3_pmaddubsw128" [(set (match_operand:V8HI 0 "register_operand" "=x,x") (ss_plus:V8HI @@ -7466,6 +8814,58 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) +(define_expand "avx2_umulhrswv16hi3" + [(set (match_operand:V16HI 0 "register_operand" "") + (truncate:V16HI + (lshiftrt:V16SI + (plus:V16SI + (lshiftrt:V16SI + (mult:V16SI + (sign_extend:V16SI + (match_operand:V16HI 1 "nonimmediate_operand" "")) + (sign_extend:V16SI + (match_operand:V16HI 2 "nonimmediate_operand" ""))) + (const_int 14)) + (const_vector:V16HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_AVX2" + "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") + +(define_insn "*avx2_umulhrswv16hi3" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (truncate:V16HI + (lshiftrt:V16SI + (plus:V16SI + (lshiftrt:V16SI + (mult:V16SI + (sign_extend:V16SI + (match_operand:V16HI 1 "nonimmediate_operand" "%x")) + (sign_extend:V16SI + (match_operand:V16HI 2 "nonimmediate_operand" "xm"))) + (const_int 14)) + (const_vector:V16HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" + "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseimul") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_expand "ssse3_pmulhrswv8hi3" [(set (match_operand:V8HI 0 "register_operand" "") (truncate:V8HI @@ -7554,11 +8954,11 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "ssse3_pshufbv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x,x") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x") - (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")] - UNSPEC_PSHUFB))] +(define_insn "<ssse3_avx2>_pshufb<mode>3" + [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") + (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") + (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")] + UNSPEC_PSHUFB))] "TARGET_SSSE3" "@ pshufb\t{%2, %0|%0, %2} @@ -7568,7 +8968,7 @@ (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) (define_insn "ssse3_pshufbv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -7582,11 +8982,11 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "ssse3_psign<mode>3" - [(set (match_operand:VI124_128 0 "register_operand" "=x,x") - (unspec:VI124_128 - [(match_operand:VI124_128 1 "register_operand" "0,x") - (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")] +(define_insn "<ssse3_avx2>_psign<mode>3" + [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x") + (unspec:VI124_AVX2 + [(match_operand:VI124_AVX2 1 "register_operand" "0,x") + (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")] UNSPEC_PSIGN))] "TARGET_SSSE3" "@ @@ -7597,7 +8997,7 @@ (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) (define_insn "ssse3_psign<mode>3" [(set (match_operand:MMXMODEI 0 "register_operand" "=y") @@ -7612,12 +9012,12 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "ssse3_palignrti" - [(set (match_operand:TI 0 "register_operand" "=x,x") - (unspec:TI [(match_operand:TI 1 "register_operand" "0,x") - (match_operand:TI 2 "nonimmediate_operand" "xm,xm") - (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] - UNSPEC_PALIGNR))] +(define_insn "<ssse3_avx2>_palignr<mode>" + [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x") + (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x") + (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] + UNSPEC_PALIGNR))] "TARGET_SSSE3" { operands[3] = GEN_INT (INTVAL (operands[3]) / 8); @@ -7639,7 +9039,7 @@ (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) (define_insn "ssse3_palignrdi" [(set (match_operand:DI 0 "register_operand" "=y") @@ -7660,16 +9060,16 @@ (set_attr "mode" "DI")]) (define_insn "abs<mode>2" - [(set (match_operand:VI124_128 0 "register_operand" "=x") - (abs:VI124_128 - (match_operand:VI124_128 1 "nonimmediate_operand" "xm")))] + [(set (match_operand:VI124_AVX2 0 "register_operand" "=x") + (abs:VI124_AVX2 + (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))] "TARGET_SSSE3" "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) (define_insn "abs<mode>2" [(set (match_operand:MMXMODEI 0 "register_operand" "=y") @@ -7693,7 +9093,7 @@ [(set (match_operand:MODEF 0 "memory_operand" "=m") (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x")] - UNSPEC_MOVNT))] + UNSPEC_MOVNT))] "TARGET_SSE4A" "movnt<ssemodesuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") @@ -7713,10 +9113,10 @@ (define_insn "sse4a_extrqi" [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand 2 "const_0_to_255_operand" "") - (match_operand 3 "const_0_to_255_operand" "")] - UNSPEC_EXTRQI))] + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand 2 "const_0_to_255_operand" "") + (match_operand 3 "const_0_to_255_operand" "")] + UNSPEC_EXTRQI))] "TARGET_SSE4A" "extrq\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "sse") @@ -7726,9 +9126,9 @@ (define_insn "sse4a_extrq" [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand:V16QI 2 "register_operand" "x")] - UNSPEC_EXTRQ))] + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V16QI 2 "register_operand" "x")] + UNSPEC_EXTRQ))] "TARGET_SSE4A" "extrq\t{%2, %0|%0, %2}" [(set_attr "type" "sse") @@ -7737,11 +9137,11 @@ (define_insn "sse4a_insertqi" [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand:V2DI 2 "register_operand" "x") - (match_operand 3 "const_0_to_255_operand" "") - (match_operand 4 "const_0_to_255_operand" "")] - UNSPEC_INSERTQI))] + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "register_operand" "x") + (match_operand 3 "const_0_to_255_operand" "") + (match_operand 4 "const_0_to_255_operand" "")] + UNSPEC_INSERTQI))] "TARGET_SSE4A" "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" [(set_attr "type" "sseins") @@ -7752,9 +9152,9 @@ (define_insn "sse4a_insertq" [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand:V2DI 2 "register_operand" "x")] - UNSPEC_INSERTQ))] + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "register_operand" "x")] + UNSPEC_INSERTQ))] "TARGET_SSE4A" "insertq\t{%2, %0|%0, %2}" [(set_attr "type" "sseins") @@ -7824,23 +9224,23 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "<MODE>")]) -(define_insn "sse4_1_movntdqa" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")] +(define_insn "<sse4_1_avx2>_movntdqa" + [(set (match_operand:VI8_AVX2 0 "register_operand" "=x") + (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")] UNSPEC_MOVNTDQA))] "TARGET_SSE4_1" "%vmovntdqa\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) -(define_insn "sse4_1_mpsadbw" - [(set (match_operand:V16QI 0 "register_operand" "=x,x") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x") - (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm") - (match_operand:SI 3 "const_0_to_255_operand" "n,n")] - UNSPEC_MPSADBW))] +(define_insn "<sse4_1_avx2>_mpsadbw" + [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") + (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") + (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") + (match_operand:SI 3 "const_0_to_255_operand" "n,n")] + UNSPEC_MPSADBW))] "TARGET_SSE4_1" "@ mpsadbw\t{%3, %2, %0|%0, %2, %3} @@ -7850,7 +9250,21 @@ (set_attr "length_immediate" "1") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx2_packusdw" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_concat:V16HI + (us_truncate:V8HI + (match_operand:V8SI 1 "register_operand" "x")) + (us_truncate:V8HI + (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))] + "TARGET_AVX2" + "vpackusdw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) (define_insn "sse4_1_packusdw" [(set (match_operand:V8HI 0 "register_operand" "=x,x") @@ -7869,12 +9283,12 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "sse4_1_pblendvb" - [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x,x") - (unspec:V16QI - [(match_operand:V16QI 1 "reg_not_xmm0_operand_maybe_avx" "0,x") - (match_operand:V16QI 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm") - (match_operand:V16QI 3 "register_operand" "Yz,x")] +(define_insn "<sse4_1_avx2>_pblendvb" + [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x") + (unspec:VI1_AVX2 + [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x") + (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm") + (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")] UNSPEC_BLENDV))] "TARGET_SSE4_1" "@ @@ -7885,13 +9299,13 @@ (set_attr "prefix_extra" "1") (set_attr "length_immediate" "*,1") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) -(define_insn "sse4_1_pblendw" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (vec_merge:V8HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") - (match_operand:V8HI 1 "register_operand" "0,x") +(define_insn "<sse4_1_avx2>_pblendw" + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") + (vec_merge:VI2_AVX2 + (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm") + (match_operand:VI2_AVX2 1 "register_operand" "0,x") (match_operand:SI 3 "const_0_to_255_operand" "n,n")))] "TARGET_SSE4_1" "@ @@ -7902,7 +9316,21 @@ (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx2_pblendd<mode>" + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") + (vec_merge:VI4_AVX2 + (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm") + (match_operand:VI4_AVX2 1 "register_operand" "x") + (match_operand:SI 3 "const_0_to_255_operand" "n")))] + "TARGET_AVX2" + "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "<sseinsnmode>")]) (define_insn "sse4_1_phminposuw" [(set (match_operand:V8HI 0 "register_operand" "=x") @@ -7915,6 +9343,17 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +(define_insn "avx2_<code>v16qiv16hi2" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (any_extend:V16HI + (match_operand:V16QI 1 "nonimmediate_operand" "xm")))] + "TARGET_AVX2" + "vpmov<extsuffix>bw\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "sse4_1_<code>v8qiv8hi2" [(set (match_operand:V8HI 0 "register_operand" "=x") (any_extend:V8HI @@ -7935,6 +9374,26 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +(define_insn "avx2_<code>v8qiv8si2" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (any_extend:V8SI + (vec_select:V8QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (const_int 4) + (const_int 5) + (const_int 6) + (const_int 7)]))))] + "TARGET_AVX2" + "vpmov<extsuffix>bd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "sse4_1_<code>v4qiv4si2" [(set (match_operand:V4SI 0 "register_operand" "=x") (any_extend:V4SI @@ -7951,6 +9410,17 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +(define_insn "avx2_<code>v8hiv8si2" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (any_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "xm")))] + "TARGET_AVX2" + "vpmov<extsuffix>wd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "sse4_1_<code>v4hiv4si2" [(set (match_operand:V4SI 0 "register_operand" "=x") (any_extend:V4SI @@ -7967,6 +9437,22 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +(define_insn "avx2_<code>v4qiv4di2" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (any_extend:V4DI + (vec_select:V4QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_AVX2" + "vpmov<extsuffix>bq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "sse4_1_<code>v2qiv2di2" [(set (match_operand:V2DI 0 "register_operand" "=x") (any_extend:V2DI @@ -7981,6 +9467,22 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +(define_insn "avx2_<code>v4hiv4di2" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (any_extend:V4DI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_AVX2" + "vpmov<extsuffix>wq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "sse4_1_<code>v2hiv2di2" [(set (match_operand:V2DI 0 "register_operand" "=x") (any_extend:V2DI @@ -7995,6 +9497,16 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +(define_insn "avx2_<code>v4siv4di2" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (any_extend:V4DI + (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] + "TARGET_AVX2" + "vpmov<extsuffix>dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "OI")]) + (define_insn "sse4_1_<code>v2siv2di2" [(set (match_operand:V2DI 0 "register_operand" "=x") (any_extend:V2DI @@ -8360,7 +9872,7 @@ ;; do not allow the value being added to be a memory operation. (define_insn "xop_pmacsww" [(set (match_operand:V8HI 0 "register_operand" "=x") - (plus:V8HI + (plus:V8HI (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x") (match_operand:V8HI 2 "nonimmediate_operand" "xm")) @@ -8372,7 +9884,7 @@ (define_insn "xop_pmacssww" [(set (match_operand:V8HI 0 "register_operand" "=x") - (ss_plus:V8HI + (ss_plus:V8HI (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x") (match_operand:V8HI 2 "nonimmediate_operand" "xm")) (match_operand:V8HI 3 "nonimmediate_operand" "x")))] @@ -8383,7 +9895,7 @@ (define_insn "xop_pmacsdd" [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI + (plus:V4SI (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") (match_operand:V4SI 2 "nonimmediate_operand" "xm")) @@ -8395,7 +9907,7 @@ (define_insn "xop_pmacssdd" [(set (match_operand:V4SI 0 "register_operand" "=x") - (ss_plus:V4SI + (ss_plus:V4SI (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") (match_operand:V4SI 2 "nonimmediate_operand" "xm")) (match_operand:V4SI 3 "nonimmediate_operand" "x")))] @@ -9218,7 +10730,7 @@ int i; if (GET_MODE (op2) != <ssescalarmode>mode) - { + { op2 = gen_reg_rtx (<ssescalarmode>mode); convert_move (op2, operands[2], false); } @@ -9250,7 +10762,7 @@ int i; if (GET_MODE (op2) != <ssescalarmode>mode) - { + { op2 = gen_reg_rtx (<ssescalarmode>mode); convert_move (op2, operands[2], false); } @@ -9772,6 +11284,99 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) +(define_mode_attr AVXTOSSEMODE + [(V4DI "V2DI") (V2DI "V2DI") + (V8SI "V4SI") (V4SI "V4SI") + (V16HI "V8HI") (V8HI "V8HI") + (V32QI "V16QI") (V16QI "V16QI")]) + +(define_insn "avx2_pbroadcast<mode>" + [(set (match_operand:VI 0 "register_operand" "=x") + (vec_duplicate:VI + (vec_select:<ssescalarmode> + (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_AVX2" + "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx2_permvarv8si" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (unspec:V8SI + [(match_operand:V8SI 1 "register_operand" "x") + (match_operand:V8SI 2 "nonimmediate_operand" "xm")] + UNSPEC_VPERMSI))] + "TARGET_AVX2" + "vpermd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_insn "avx2_permv4df" + [(set (match_operand:V4DF 0 "register_operand" "=x") + (unspec:V4DF + [(match_operand:V4DF 1 "register_operand" "xm") + (match_operand:SI 2 "const_0_to_255_operand" "n")] + UNSPEC_VPERMDF))] + "TARGET_AVX2" + "vpermpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_insn "avx2_permvarv8sf" + [(set (match_operand:V8SF 0 "register_operand" "=x") + (unspec:V8SF + [(match_operand:V8SF 1 "register_operand" "x") + (match_operand:V8SF 2 "nonimmediate_operand" "xm")] + UNSPEC_VPERMSF))] + "TARGET_AVX2" + "vpermps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_insn "avx2_permv4di" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (unspec:V4DI + [(match_operand:V4DI 1 "register_operand" "xm") + (match_operand:SI 2 "const_0_to_255_operand" "n")] + UNSPEC_VPERMDI))] + "TARGET_AVX2" + "vpermq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_insn "avx2_permv2ti" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (unspec:V4DI + [(match_operand:V4DI 1 "register_operand" "x") + (match_operand:V4DI 2 "register_operand" "xm") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_VPERMTI))] + "TARGET_AVX2" + "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_insn "avx2_vec_dupv4df" + [(set (match_operand:V4DF 0 "register_operand" "=x") + (vec_duplicate:V4DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_AVX2" + "vbroadcastsd\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1") + (set_attr "prefix" "vex") + (set_attr "mode" "V4DF")]) + ;; Modes handled by AVX vec_dup patterns. (define_mode_iterator AVX_VEC_DUP_MODE [V8SI V8SF V4DI V4DF]) @@ -9789,6 +11394,18 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) +(define_insn "avx2_vbroadcasti128_<mode>" + [(set (match_operand:VI_256 0 "register_operand" "=x") + (vec_concat:VI_256 + (match_operand:<ssehalfvecmode> 1 "memory_operand" "m") + (match_dup 1)))] + "TARGET_AVX2" + "vbroadcasti128\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_split [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "") (vec_duplicate:AVX_VEC_DUP_MODE @@ -9880,7 +11497,7 @@ } operands[1] = adjust_address_nv (op1, <ssescalarmode>mode, - elt * GET_MODE_SIZE (<ssescalarmode>mode)); + elt * GET_MODE_SIZE (<ssescalarmode>mode)); }) (define_expand "avx_vpermil<mode>" @@ -10061,6 +11678,36 @@ DONE; }) +(define_insn "avx2_vec_set_lo_v4di" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (vec_concat:V4DI + (match_operand:V2DI 2 "nonimmediate_operand" "xm") + (vec_select:V2DI + (match_operand:V4DI 1 "register_operand" "x") + (parallel [(const_int 2) (const_int 3)]))))] + "TARGET_AVX2" + "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_insn "avx2_vec_set_hi_v4di" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (vec_concat:V4DI + (vec_select:V2DI + (match_operand:V4DI 1 "register_operand" "x") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_AVX2" + "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "vec_set_lo_<mode>" [(set (match_operand:VI8F_256 0 "register_operand" "=x") (vec_concat:VI8F_256 @@ -10203,24 +11850,39 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) -(define_expand "avx_maskload<ssemodesuffix><avxsizesuffix>" - [(set (match_operand:VF 0 "register_operand" "") - (unspec:VF +(define_expand "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>" + [(set (match_operand:V48_AVX2 0 "register_operand" "") + (unspec:V48_AVX2 [(match_operand:<sseintvecmode> 2 "register_operand" "") - (match_operand:VF 1 "memory_operand" "") + (match_operand:V48_AVX2 1 "memory_operand" "") (match_dup 0)] UNSPEC_MASKMOV))] "TARGET_AVX") -(define_expand "avx_maskstore<ssemodesuffix><avxsizesuffix>" - [(set (match_operand:VF 0 "memory_operand" "") - (unspec:VF +(define_expand "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>" + [(set (match_operand:V48_AVX2 0 "memory_operand" "") + (unspec:V48_AVX2 [(match_operand:<sseintvecmode> 1 "register_operand" "") - (match_operand:VF 2 "register_operand" "") + (match_operand:V48_AVX2 2 "register_operand" "") (match_dup 0)] UNSPEC_MASKMOV))] "TARGET_AVX") +(define_insn "*avx2_maskmov<ssemodesuffix><avxsizesuffix>" + [(set (match_operand:VI48_AVX2 0 "nonimmediate_operand" "=x,m") + (unspec:VI48_AVX2 + [(match_operand:<sseintvecmode> 1 "register_operand" "x,x") + (match_operand:VI48_AVX2 2 "nonimmediate_operand" "m,x") + (match_dup 0)] + UNSPEC_MASKMOV))] + "TARGET_AVX2 + && (REG_P (operands[0]) == MEM_P (operands[2]))" + "vpmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "<sseinsnmode>")]) + (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>" [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m") (unspec:VF @@ -10265,6 +11927,286 @@ DONE; }) +(define_insn "avx2_extracti128" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_select:V2DI + (match_operand:V4DI 1 "nonimmediate_operand" "xm") + (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))] + "TARGET_AVX2" + "vextracti128\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_expand "avx2_inserti128" + [(match_operand:V4DI 0 "register_operand" "") + (match_operand:V4DI 1 "register_operand" "") + (match_operand:V2DI 2 "nonimmediate_operand" "") + (match_operand:SI 3 "const_0_to_1_operand" "")] + "TARGET_AVX2" +{ + rtx (*insn)(rtx, rtx, rtx); + + switch (INTVAL (operands[3])) + { + case 0: + insn = gen_avx2_vec_set_lo_v4di; + break; + case 1: + insn = gen_avx2_vec_set_hi_v4di; + break; + default: + gcc_unreachable (); + } + + emit_insn (insn (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "avx2_ashrvv8si" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (vec_concat:V8SI + (vec_concat:V4SI + (vec_concat:V2SI + (ashiftrt:SI + (vec_select:SI + (match_operand:V8SI 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SI + (match_operand:V8SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))) + (ashiftrt:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 1)])))) + (vec_concat:V2SI + (ashiftrt:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 2)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 2)]))) + (ashiftrt:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 3)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 3)]))))) + (vec_concat:V4SI + (vec_concat:V2SI + (ashiftrt:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 0)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 0)]))) + (ashiftrt:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 1)])))) + (vec_concat:V2SI + (ashiftrt:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 2)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 2)]))) + (ashiftrt:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 3)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 3)])))))))] + "TARGET_AVX2" + "vpsravd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_insn "avx2_ashrvv4si" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (vec_concat:V2SI + (ashiftrt:SI + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))) + (ashiftrt:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 1)])))) + (vec_concat:V2SI + (ashiftrt:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 2)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 2)]))) + (ashiftrt:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 3)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 3)]))))))] + "TARGET_AVX2" + "vpsravd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "mode" "TI")]) + +(define_insn "avx2_<lshift>vv8si" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (vec_concat:V8SI + (vec_concat:V4SI + (vec_concat:V2SI + (lshift:SI + (vec_select:SI + (match_operand:V8SI 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SI + (match_operand:V8SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))) + (lshift:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 1)])))) + (vec_concat:V2SI + (lshift:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 2)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 2)]))) + (lshift:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 3)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 3)]))))) + (vec_concat:V4SI + (vec_concat:V2SI + (lshift:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 0)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 0)]))) + (lshift:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 1)])))) + (vec_concat:V2SI + (lshift:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 2)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 2)]))) + (lshift:SI + (vec_select:SI + (match_dup 1) + (parallel [(const_int 3)])) + (vec_select:SI + (match_dup 2) + (parallel [(const_int 3)])))))))] + "TARGET_AVX2" + "vp<lshift_insn>vd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_insn "avx2_<lshift>v<mode>" + [(set (match_operand:VI4SD_AVX2 0 "register_operand" "=x") + (vec_concat:VI4SD_AVX2 + (vec_concat:<ssehalfvecmode> + (lshift:<ssescalarmode> + (vec_select:<ssescalarmode> + (match_operand:VI4SD_AVX2 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:<ssescalarmode> + (match_operand:VI4SD_AVX2 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))) + (lshift:<ssescalarmode> + (vec_select:<ssescalarmode> + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:<ssescalarmode> + (match_dup 2) + (parallel [(const_int 1)])))) + (vec_concat:<ssehalfvecmode> + (lshift:<ssescalarmode> + (vec_select:<ssescalarmode> + (match_dup 1) + (parallel [(const_int 2)])) + (vec_select:<ssescalarmode> + (match_dup 2) + (parallel [(const_int 2)]))) + (lshift:<ssescalarmode> + (vec_select:<ssescalarmode> + (match_dup 1) + (parallel [(const_int 3)])) + (vec_select:<ssescalarmode> + (match_dup 2) + (parallel [(const_int 3)]))))))] + "TARGET_AVX2" + "vp<lshift_insn>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx2_<lshift>vv2di" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_concat:V2DI + (lshift:DI + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DI + (match_operand:V2DI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))) + (lshift:DI + (vec_select:DI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:DI + (match_dup 2) + (parallel [(const_int 1)])))))] + "TARGET_AVX2" + "vp<lshift_insn>vq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "mode" "TI")]) + (define_insn "*vec_concat<mode>_avx" [(set (match_operand:V_256 0 "register_operand" "=x,x") (vec_concat:V_256 @@ -10278,7 +12220,7 @@ return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}"; case 1: switch (get_attr_mode (insn)) - { + { case MODE_V8SF: return "vmovaps\t{%1, %x0|%x0, %1}"; case MODE_V4DF: @@ -10373,3 +12315,95 @@ [(set_attr "type" "ssecvt") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) + +;; For gather* insn patterns +(define_mode_iterator VEC_GATHER_MODE + [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF]) +(define_mode_attr VEC_GATHER_MODE + [(V2DI "V4SI") (V2DF "V4SI") + (V4DI "V4SI") (V4DF "V4SI") + (V4SI "V4SI") (V4SF "V4SI") + (V8SI "V8SI") (V8SF "V8SI")]) + +(define_expand "avx2_gathersi<mode>" + [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") + (unspec:VEC_GATHER_MODE + [(match_operand:VEC_GATHER_MODE 1 "register_operand" "") + (match_operand:<ssescalarmode> 2 "memory_operand" "") + (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "") + (match_operand:VEC_GATHER_MODE 4 "register_operand" "") + (match_operand:SI 5 "const1248_operand " "")] + UNSPEC_GATHER))] + "TARGET_AVX2") + +(define_insn "*avx2_gathersi<mode>" + [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=x") + (unspec:VEC_GATHER_MODE + [(match_operand:VEC_GATHER_MODE 1 "register_operand" "0") + (mem:<ssescalarmode> + (match_operand:P 2 "register_operand" "r")) + (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "x") + (match_operand:VEC_GATHER_MODE 4 "register_operand" "x") + (match_operand:SI 5 "const1248_operand" "n")] + UNSPEC_GATHER))] + "TARGET_AVX2" + "v<gthrfirstp>gatherd<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx2_gatherdi<mode>" + [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") + (unspec:VEC_GATHER_MODE + [(match_operand:VEC_GATHER_MODE 1 "register_operand" "") + (match_operand:<ssescalarmode> 2 "memory_operand" "") + (match_operand:<AVXMODE48P_DI> 3 "register_operand" "") + (match_operand:VEC_GATHER_MODE 4 "register_operand" "") + (match_operand:SI 5 "const1248_operand " "")] + UNSPEC_GATHER))] + "TARGET_AVX2") + +(define_insn "*avx2_gatherdi<mode>" + [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=x") + (unspec:AVXMODE48P_DI + [(match_operand:AVXMODE48P_DI 1 "register_operand" "0") + (mem:<ssescalarmode> + (match_operand:P 2 "register_operand" "r")) + (match_operand:<AVXMODE48P_DI> 3 "register_operand" "x") + (match_operand:AVXMODE48P_DI 4 "register_operand" "x") + (match_operand:SI 5 "const1248_operand" "n")] + UNSPEC_GATHER))] + "TARGET_AVX2" + "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "<sseinsnmode>")]) + +;; Special handling for VEX.256 with float arguments +;; since there're still xmms as operands +(define_expand "avx2_gatherdi<mode>256" + [(set (match_operand:VI4F_128 0 "register_operand" "") + (unspec:VI4F_128 + [(match_operand:VI4F_128 1 "register_operand" "") + (match_operand:<ssescalarmode> 2 "memory_operand" "") + (match_operand:V4DI 3 "register_operand" "") + (match_operand:VI4F_128 4 "register_operand" "") + (match_operand:SI 5 "const1248_operand " "")] + UNSPEC_GATHER))] + "TARGET_AVX2") + +(define_insn "*avx2_gatherdi<mode>256" + [(set (match_operand:VI4F_128 0 "register_operand" "=x") + (unspec:VI4F_128 + [(match_operand:VI4F_128 1 "register_operand" "0") + (mem:<ssescalarmode> + (match_operand:P 2 "register_operand" "r")) + (match_operand:V4DI 3 "register_operand" "x") + (match_operand:VI4F_128 4 "register_operand" "x") + (match_operand:SI 5 "const1248_operand" "n")] + UNSPEC_GATHER))] + "TARGET_AVX2" + "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "<sseinsnmode>")]) |