aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2019-05-15 15:05:07 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2019-05-15 08:05:07 -0700
commit6e9fffcf83638a50cac6e2a127817c409238cfde (patch)
tree1175217033e97657fd12bbfa1a2bea9f9704fcce
parentb74ebb2a36adbb18da52f7eb25b54655b04c7be4 (diff)
downloadgcc-6e9fffcf83638a50cac6e2a127817c409238cfde.zip
gcc-6e9fffcf83638a50cac6e2a127817c409238cfde.tar.gz
gcc-6e9fffcf83638a50cac6e2a127817c409238cfde.tar.bz2
i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX. For MMX punpckhXX, move bits 64:127 to bits 0:63 in SSE register. Only SSE register source operand is allowed. PR target/89021 * config/i386/i386-expand.c (ix86_split_mmx_punpck): New function. * config/i386/i386-protos.h (ix86_split_mmx_punpck): New prototype. * config/i386/mmx.m (mmx_punpckhbw): Changed to define_insn_and_split to support SSE emulation. (mmx_punpcklbw): Likewise. (mmx_punpckhwd): Likewise. (mmx_punpcklwd): Likewise. (mmx_punpckhdq): Likewise. (mmx_punpckldq): Likewise. From-SVN: r271216
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/i386/i386-expand.c77
-rw-r--r--gcc/config/i386/i386-protos.h1
-rw-r--r--gcc/config/i386/mmx.md138
4 files changed, 182 insertions, 48 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 01783a9..3e5e2d2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,18 @@
2019-05-15 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/89021
+ * config/i386/i386-expand.c (ix86_split_mmx_punpck): New function.
+ * config/i386/i386-protos.h (ix86_split_mmx_punpck): New
+ prototype.
+ * config/i386/mmx.m (mmx_punpckhbw): Changed to
+ define_insn_and_split to support SSE emulation.
+ (mmx_punpcklbw): Likewise.
+ (mmx_punpckhwd): Likewise.
+ (mmx_punpcklwd): Likewise.
+ (mmx_punpckhdq): Likewise.
+ (mmx_punpckldq): Likewise.
+
+2019-05-15 H.J. Lu <hongjiu.lu@intel.com>
Uros Bizjak <ubizjak@gmail.com>
PR target/89021
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index f1e0593..81300f6 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -716,6 +716,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
ix86_move_vector_high_sse_to_mmx (op0);
}
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ machine_mode mode = GET_MODE (op0);
+ rtx mask;
+ /* The corresponding SSE mode. */
+ machine_mode sse_mode, double_sse_mode;
+
+ switch (mode)
+ {
+ case E_V8QImode:
+ sse_mode = V16QImode;
+ double_sse_mode = V32QImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (16,
+ GEN_INT (0), GEN_INT (16),
+ GEN_INT (1), GEN_INT (17),
+ GEN_INT (2), GEN_INT (18),
+ GEN_INT (3), GEN_INT (19),
+ GEN_INT (4), GEN_INT (20),
+ GEN_INT (5), GEN_INT (21),
+ GEN_INT (6), GEN_INT (22),
+ GEN_INT (7), GEN_INT (23)));
+ break;
+
+ case E_V4HImode:
+ sse_mode = V8HImode;
+ double_sse_mode = V16HImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (8,
+ GEN_INT (0), GEN_INT (8),
+ GEN_INT (1), GEN_INT (9),
+ GEN_INT (2), GEN_INT (10),
+ GEN_INT (3), GEN_INT (11)));
+ break;
+
+ case E_V2SImode:
+ sse_mode = V4SImode;
+ double_sse_mode = V8SImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4,
+ GEN_INT (0), GEN_INT (4),
+ GEN_INT (1), GEN_INT (5)));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Generate SSE punpcklXX. */
+ rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0));
+ op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1));
+ op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2));
+
+ op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+ op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+ rtx insn = gen_rtx_SET (dest, op2);
+ emit_insn (insn);
+
+ if (high_p)
+ {
+ /* Move bits 64:127 to bits 0:63. */
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (0)));
+ dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest));
+ op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+ insn = gen_rtx_SET (dest, op1);
+ emit_insn (insn);
+ }
+}
+
/* Helper function of ix86_fixup_binary_operands to canonicalize
operand order. Returns true if the operands should be swapped. */
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 760f530..b9de1e7 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -202,6 +202,7 @@ extern rtx ix86_split_stack_guard (void);
extern void ix86_move_vector_high_sse_to_mmx (rtx);
extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 28c9aa7..b3fc7f3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1089,87 +1089,129 @@
(set_attr "type" "mmxshft,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
- "TARGET_MMX"
- "punpckhbw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhbw\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpcklbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
(const_int 3) (const_int 11)])))]
- "TARGET_MMX"
- "punpcklbw\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpcklbw\t{%2, %0|%0, %k2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
- "TARGET_MMX"
- "punpckhwd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhwd\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpcklwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
- "TARGET_MMX"
- "punpcklwd\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpcklwd\t{%2, %0|%0, %k2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhdq"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhdq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_select:V2SI
(vec_concat:V4SI
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 1)
(const_int 3)])))]
- "TARGET_MMX"
- "punpckhdq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhdq\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckldq"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckldq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_select:V2SI
(vec_concat:V4SI
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 0)
(const_int 2)])))]
- "TARGET_MMX"
- "punpckldq\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckldq\t{%2, %0|%0, %k2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_pinsrw"
[(set (match_operand:V4HI 0 "register_operand")