aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2019-05-15 15:04:08 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2019-05-15 08:04:08 -0700
commitb74ebb2a36adbb18da52f7eb25b54655b04c7be4 (patch)
treeee8b7f8ff243eec0be5164ce27e4f599dd60209f
parentdfa61b9ed06d71901c4c430caa89820972ad68fe (diff)
downloadgcc-b74ebb2a36adbb18da52f7eb25b54655b04c7be4.zip
gcc-b74ebb2a36adbb18da52f7eb25b54655b04c7be4.tar.gz
gcc-b74ebb2a36adbb18da52f7eb25b54655b04c7be4.tar.bz2
i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb plus moving bits 64:95 to bits 32:63 in SSE register. Only SSE register source operand is allowed. PR target/89021 * config/i386/i386-expand.c (ix86_move_vector_high_sse_to_mmx): New function. (ix86_split_mmx_pack): Likewise. * config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx): New prototype. (ix86_split_mmx_pack): Likewise. * config/i386/i386.md (mmx_isa): New. (enabled): Also check mmx_isa. * config/i386/mmx.md (any_s_truncate): New code iterator. (s_trunsuffix): New code attr. (mmx_packsswb): Removed. (mmx_packssdw): Likewise. (mmx_packuswb): Likewise. (mmx_pack<s_trunsuffix>swb): New define_insn_and_split to emulate MMX packsswb/packuswb with SSE2. (mmx_packssdw): Likewise. * config/i386/predicates.md (register_mmxmem_operand): New. Co-Authored-By: Uros Bizjak <ubizjak@gmail.com> From-SVN: r271215
-rw-r--r--gcc/ChangeLog22
-rw-r--r--gcc/config/i386/i386-expand.c54
-rw-r--r--gcc/config/i386/i386-protos.h3
-rw-r--r--gcc/config/i386/i386.md13
-rw-r--r--gcc/config/i386/mmx.md67
-rw-r--r--gcc/config/i386/predicates.md7
6 files changed, 136 insertions, 30 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index eb67d11..01783a9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,26 @@
2019-05-15 H.J. Lu <hongjiu.lu@intel.com>
+ Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/89021
+ * config/i386/i386-expand.c (ix86_move_vector_high_sse_to_mmx):
+ New function.
+ (ix86_split_mmx_pack): Likewise.
+ * config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
+ New prototype.
+ (ix86_split_mmx_pack): Likewise.
+ * config/i386/i386.md (mmx_isa): New.
+ (enabled): Also check mmx_isa.
+ * config/i386/mmx.md (any_s_truncate): New code iterator.
+ (s_trunsuffix): New code attr.
+ (mmx_packsswb): Removed.
+ (mmx_packssdw): Likewise.
+ (mmx_packuswb): Likewise.
+ (mmx_pack<s_trunsuffix>swb): New define_insn_and_split to emulate
+ MMX packsswb/packuswb with SSE2.
+ (mmx_packssdw): Likewise.
+ * config/i386/predicates.md (register_mmxmem_operand): New.
+
+2019-05-15 H.J. Lu <hongjiu.lu@intel.com>
PR target/89021
* config/i386/i386-c.c (ix86_target_macros_internal): Define
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index a55d492..f1e0593 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -662,6 +662,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
gcc_unreachable ();
}
+/* Move bits 64:95 to bits 32:63. */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+ rtx mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+ GEN_INT (0), GEN_INT (0)));
+ rtx dest = lowpart_subreg (V4SImode, op, GET_MODE (op));
+ op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+ rtx insn = gen_rtx_SET (dest, op);
+ emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2. */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+
+ machine_mode dmode = GET_MODE (op0);
+ machine_mode smode = GET_MODE (op1);
+ machine_mode inner_dmode = GET_MODE_INNER (dmode);
+ machine_mode inner_smode = GET_MODE_INNER (smode);
+
+ /* Get the corresponding SSE mode for destination. */
+ int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+ machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+ nunits).require ();
+ machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+ nunits / 2).require ();
+
+ /* Get the corresponding SSE mode for source. */
+ nunits = 16 / GET_MODE_SIZE (inner_smode);
+ machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
+ nunits).require ();
+
+ /* Generate SSE pack with signed/unsigned saturation. */
+ rtx dest = lowpart_subreg (sse_dmode, op0, GET_MODE (op0));
+ op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1));
+ op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2));
+
+ op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+ op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+ rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
+ op1, op2));
+ emit_insn (insn);
+
+ ix86_move_vector_high_sse_to_mmx (op0);
+}
+
/* Helper function of ix86_fixup_binary_operands to canonicalize
operand order. Returns true if the operands should be swapped. */
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 597af64..760f530 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -200,6 +200,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
extern rtx ix86_split_stack_guard (void);
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
#endif /* TREE_CODE */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 0541122..2ae4bb8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -796,6 +796,10 @@
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
(const_string "base"))
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
+ (const_string "base"))
+
(define_attr "enabled" ""
(cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
(eq_attr "isa" "x64_sse2")
@@ -834,6 +838,15 @@
(eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
(eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
(eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
+
+ (eq_attr "mmx_isa" "native")
+ (symbol_ref "!TARGET_MMX_WITH_SSE")
+ (eq_attr "mmx_isa" "x64")
+ (symbol_ref "TARGET_MMX_WITH_SSE")
+ (eq_attr "mmx_isa" "x64_avx")
+ (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
+ (eq_attr "mmx_isa" "x64_noavx")
+ (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
]
(const_int 1)))
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b566cc8..28c9aa7 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1046,41 +1046,48 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "mmx_packsswb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+;; Used in signed and unsigned truncations with saturation.
+(define_code_iterator any_s_truncate [ss_truncate us_truncate])
+;; Instruction suffix for truncations with saturation.
+(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
+
+(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_concat:V8QI
- (ss_truncate:V4QI
- (match_operand:V4HI 1 "register_operand" "0"))
- (ss_truncate:V4QI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
- "TARGET_MMX"
- "packsswb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
+ (any_s_truncate:V4QI
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv"))
+ (any_s_truncate:V4QI
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_pack (operands, <any_s_truncate:CODE>); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_packssdw"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_packssdw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_concat:V4HI
(ss_truncate:V2HI
- (match_operand:V2SI 1 "register_operand" "0"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv"))
(ss_truncate:V2HI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
- "TARGET_MMX"
- "packssdw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
-
-(define_insn "mmx_packuswb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_concat:V8QI
- (us_truncate:V4QI
- (match_operand:V4HI 1 "register_operand" "0"))
- (us_truncate:V4QI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
- "TARGET_MMX"
- "packuswb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ packssdw\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_punpckhbw"
[(set (match_operand:V8QI 0 "register_operand" "=y")
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 865947d..29867fb 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -49,6 +49,13 @@
(and (match_code "reg")
(match_test "MMX_REGNO_P (REGNO (op))")))
+;; Match register operands, but include memory operands for
+;; !TARGET_MMX_WITH_SSE.
+(define_predicate "register_mmxmem_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (not (match_test "TARGET_MMX_WITH_SSE"))
+ (match_operand 0 "memory_operand"))))
+
;; True if the operand is an SSE register.
(define_predicate "sse_reg_operand"
(and (match_code "reg")