aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2007-05-26 14:34:21 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2007-05-26 07:34:21 -0700
commite5ac0b9bceadbaf799f72e80c84902e2dafb44f9 (patch)
treeb4417bea9cf60754231d7ed67f88717dd9f67f86
parenta0cb58b2cdeccc118360d4f0a9b01e51c0cfe05c (diff)
downloadgcc-e5ac0b9bceadbaf799f72e80c84902e2dafb44f9.zip
gcc-e5ac0b9bceadbaf799f72e80c84902e2dafb44f9.tar.gz
gcc-e5ac0b9bceadbaf799f72e80c84902e2dafb44f9.tar.bz2
i386-protos.h (ix86_expand_sse4_unpack): New.
2007-05-26 H.J. Lu <hongjiu.lu@intel.com> * config/i386/i386-protos.h (ix86_expand_sse4_unpack): New. * config/i386/i386.c (ix86_expand_sse4_unpack): New. * config/i386/sse.md (vec_unpacku_hi_v16qi): Call ix86_expand_sse4_unpack if SSE4.1 is enabled. (vec_unpacks_hi_v16qi): Likewise. (vec_unpacku_lo_v16qi): Likewise. (vec_unpacks_lo_v16qi): Likewise. (vec_unpacku_hi_v8hi): Likewise. (vec_unpacks_hi_v8hi): Likewise. (vec_unpacku_lo_v8hi): Likewise. (vec_unpacks_lo_v8hi): Likewise. (vec_unpacku_hi_v4si): Likewise. (vec_unpacks_hi_v4si): Likewise. (vec_unpacku_lo_v4si): Likewise. (vec_unpacks_lo_v4si): Likewise. From-SVN: r125093
-rw-r--r--gcc/ChangeLog20
-rw-r--r--gcc/config/i386/i386-protos.h1
-rw-r--r--gcc/config/i386/i386.c49
-rw-r--r--gcc/config/i386/sse.md60
4 files changed, 118 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 85ddc22..cdd0b84 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2007-05-26 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386-protos.h (ix86_expand_sse4_unpack): New.
+
+ * config/i386/i386.c (ix86_expand_sse4_unpack): New.
+
+ * config/i386/sse.md (vec_unpacku_hi_v16qi): Call
+ ix86_expand_sse4_unpack if SSE4.1 is enabled.
+ (vec_unpacks_hi_v16qi): Likewise.
+ (vec_unpacku_lo_v16qi): Likewise.
+ (vec_unpacks_lo_v16qi): Likewise.
+ (vec_unpacku_hi_v8hi): Likewise.
+ (vec_unpacks_hi_v8hi): Likewise.
+ (vec_unpacku_lo_v8hi): Likewise.
+ (vec_unpacks_lo_v8hi): Likewise.
+ (vec_unpacku_hi_v4si): Likewise.
+ (vec_unpacks_hi_v4si): Likewise.
+ (vec_unpacku_lo_v4si): Likewise.
+ (vec_unpacks_lo_v4si): Likewise.
+
2007-05-26 Kazu Hirata <kazu@codesourcery.com>
* c-typeck.c, config/arm/arm.c, config/darwin.c,
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 2f32039..60b4955 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -112,6 +112,7 @@ extern int ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
extern void ix86_expand_sse_unpack (rtx[], bool, bool);
+extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
extern int ix86_expand_int_addcc (rtx[]);
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7bc5fe0..b0db950 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12843,6 +12843,55 @@ ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
emit_insn (unpack (dest, operands[1], se));
}
+/* This function performs the same task as ix86_expand_sse_unpack,
+ but with SSE4.1 instructions. */
+
+void
+ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ rtx (*unpack)(rtx, rtx);
+ rtx src, dest;
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+ else
+ unpack = gen_sse4_1_extendv8qiv8hi2;
+ break;
+ case V8HImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv4hiv4si2;
+ else
+ unpack = gen_sse4_1_extendv4hiv4si2;
+ break;
+ case V4SImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv2siv2di2;
+ else
+ unpack = gen_sse4_1_extendv2siv2di2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ dest = operands[0];
+ if (high_p)
+ {
+ /* Shift higher 8 bytes to lower 8 bytes. */
+ src = gen_reg_rtx (imode);
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
+ gen_lowpart (TImode, operands[1]),
+ GEN_INT (64)));
+ }
+ else
+ src = operands[1];
+
+ emit_insn (unpack (dest, src));
+}
+
/* Expand conditional increment or decrement using adb/sbb instructions.
The default case using setcc followed by the conditional move can be
done by generic code. */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 800807c..81ff925 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4722,7 +4722,10 @@
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
DONE;
})
@@ -4731,7 +4734,10 @@
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
DONE;
})
@@ -4740,7 +4746,10 @@
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
DONE;
})
@@ -4749,7 +4758,10 @@
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
DONE;
})
@@ -4758,7 +4770,10 @@
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
DONE;
})
@@ -4767,7 +4782,10 @@
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
DONE;
})
@@ -4776,7 +4794,10 @@
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
DONE;
})
@@ -4785,7 +4806,10 @@
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
DONE;
})
@@ -4794,7 +4818,10 @@
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
DONE;
})
@@ -4803,7 +4830,10 @@
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
DONE;
})
@@ -4812,7 +4842,10 @@
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
DONE;
})
@@ -4821,7 +4854,10 @@
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
DONE;
})