aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/sse.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/sse.md')
-rw-r--r--gcc/config/i386/sse.md475
1 files changed, 473 insertions, 2 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 78976ed..9985b7d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2620,7 +2620,20 @@
[(set_attr "type" "sseimul")
(set_attr "mode" "TI")])
-(define_insn "sse2_smulv8hi3_highpart"
+(define_insn "smulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*smulv8hi3_highpart"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(truncate:V8HI
(lshiftrt:V8SI
@@ -2635,7 +2648,20 @@
[(set_attr "type" "sseimul")
(set_attr "mode" "TI")])
-(define_insn "sse2_umulv8hi3_highpart"
+(define_insn "umulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*umulv8hi3_highpart"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(truncate:V8HI
(lshiftrt:V8SI
@@ -2792,6 +2818,122 @@
DONE;
})
+(define_expand "vec_widen_umult_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
(define_expand "sdot_prodv8hi"
[(match_operand:V4SI 0 "register_operand" "")
(match_operand:V8HI 1 "nonimmediate_operand" "")
@@ -3215,6 +3357,227 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Reduce:
+;; op1 = abcdefghijklmnop
+;; op2 = qrstuvwxyz012345
+;; h1 = aqbrcsdteufvgwhx
+;; l1 = iyjzk0l1m2n3o4p5
+;; h2 = aiqybjrzcks0dlt1
+;; l2 = emu2fnv3gow4hpx5
+;; h3 = aeimquy2bfjnrvz3
+;; l3 = cgkosw04dhlptx15
+;; result = bdfhjlnprtvxz135
+(define_expand "vec_pack_mod_v8hi"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, h1, l1, h2, l2, h3, l3;
+
+ op1 = gen_lowpart (V16QImode, operands[1]);
+ op2 = gen_lowpart (V16QImode, operands[2]);
+ h1 = gen_reg_rtx (V16QImode);
+ l1 = gen_reg_rtx (V16QImode);
+ h2 = gen_reg_rtx (V16QImode);
+ l2 = gen_reg_rtx (V16QImode);
+ h3 = gen_reg_rtx (V16QImode);
+ l3 = gen_reg_rtx (V16QImode);
+
+ emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
+ emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
+ emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
+ emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
+ emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
+ emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
+ DONE;
+})
+
+;; Reduce:
+;; op1 = abcdefgh
+;; op2 = ijklmnop
+;; h1 = aibjckdl
+;; l1 = emfngohp
+;; h2 = aeimbfjn
+;; l2 = cgkodhlp
+;; result = bdfhjlnp
+(define_expand "vec_pack_mod_v4si"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, h1, l1, h2, l2;
+
+ op1 = gen_lowpart (V8HImode, operands[1]);
+ op2 = gen_lowpart (V8HImode, operands[2]);
+ h1 = gen_reg_rtx (V8HImode);
+ l1 = gen_reg_rtx (V8HImode);
+ h2 = gen_reg_rtx (V8HImode);
+ l2 = gen_reg_rtx (V8HImode);
+
+ emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
+ emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
+ emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
+ emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
+ DONE;
+})
+
+;; Reduce:
+;; op1 = abcd
+;; op2 = efgh
+;; h1 = aebf
+;; l1 = cgdh
+;; result = bdfh
+(define_expand "vec_pack_mod_v2di"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V2DI 1 "register_operand" "")
+ (match_operand:V2DI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, h1, l1;
+
+ op1 = gen_lowpart (V4SImode, operands[1]);
+ op2 = gen_lowpart (V4SImode, operands[2]);
+ h1 = gen_reg_rtx (V4SImode);
+ l1 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
(define_insn "sse2_packsswb"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_concat:V16QI
@@ -3832,6 +4195,114 @@
DONE;
})
+(define_expand "vec_unpacku_hi_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
+(define_expand "vec_unpacku_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
+(define_expand "vec_unpacku_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Miscellaneous