diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2007-05-26 14:34:21 +0000 |
---|---|---|
committer | H.J. Lu <hjl@gcc.gnu.org> | 2007-05-26 07:34:21 -0700 |
commit | e5ac0b9bceadbaf799f72e80c84902e2dafb44f9 (patch) | |
tree | b4417bea9cf60754231d7ed67f88717dd9f67f86 | |
parent | a0cb58b2cdeccc118360d4f0a9b01e51c0cfe05c (diff) | |
download | gcc-e5ac0b9bceadbaf799f72e80c84902e2dafb44f9.zip gcc-e5ac0b9bceadbaf799f72e80c84902e2dafb44f9.tar.gz gcc-e5ac0b9bceadbaf799f72e80c84902e2dafb44f9.tar.bz2 |
i386-protos.h (ix86_expand_sse4_unpack): New.
2007-05-26 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386-protos.h (ix86_expand_sse4_unpack): New.
* config/i386/i386.c (ix86_expand_sse4_unpack): New.
* config/i386/sse.md (vec_unpacku_hi_v16qi): Call
ix86_expand_sse4_unpack if SSE4.1 is enabled.
(vec_unpacks_hi_v16qi): Likewise.
(vec_unpacku_lo_v16qi): Likewise.
(vec_unpacks_lo_v16qi): Likewise.
(vec_unpacku_hi_v8hi): Likewise.
(vec_unpacks_hi_v8hi): Likewise.
(vec_unpacku_lo_v8hi): Likewise.
(vec_unpacks_lo_v8hi): Likewise.
(vec_unpacku_hi_v4si): Likewise.
(vec_unpacks_hi_v4si): Likewise.
(vec_unpacku_lo_v4si): Likewise.
(vec_unpacks_lo_v4si): Likewise.
From-SVN: r125093
-rw-r--r-- | gcc/ChangeLog | 20 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 49 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 60 |
4 files changed, 118 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 85ddc22..cdd0b84 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2007-05-26 H.J. Lu <hongjiu.lu@intel.com> + + * config/i386/i386-protos.h (ix86_expand_sse4_unpack): New. + + * config/i386/i386.c (ix86_expand_sse4_unpack): New. + + * config/i386/sse.md (vec_unpacku_hi_v16qi): Call + ix86_expand_sse4_unpack if SSE4.1 is enabled. + (vec_unpacks_hi_v16qi): Likewise. + (vec_unpacku_lo_v16qi): Likewise. + (vec_unpacks_lo_v16qi): Likewise. + (vec_unpacku_hi_v8hi): Likewise. + (vec_unpacks_hi_v8hi): Likewise. + (vec_unpacku_lo_v8hi): Likewise. + (vec_unpacks_lo_v8hi): Likewise. + (vec_unpacku_hi_v4si): Likewise. + (vec_unpacks_hi_v4si): Likewise. + (vec_unpacku_lo_v4si): Likewise. + (vec_unpacks_lo_v4si): Likewise. + 2007-05-26 Kazu Hirata <kazu@codesourcery.com> * c-typeck.c, config/arm/arm.c, config/darwin.c, diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 2f32039..60b4955 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -112,6 +112,7 @@ extern int ix86_expand_fp_movcc (rtx[]); extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_int_vcond (rtx[]); extern void ix86_expand_sse_unpack (rtx[], bool, bool); +extern void ix86_expand_sse4_unpack (rtx[], bool, bool); extern int ix86_expand_int_addcc (rtx[]); extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); extern void x86_initialize_trampoline (rtx, rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7bc5fe0..b0db950 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12843,6 +12843,55 @@ ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p) emit_insn (unpack (dest, operands[1], se)); } +/* This function performs the same task as ix86_expand_sse_unpack, + but with SSE4.1 instructions. */ + +void +ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) +{ + enum machine_mode imode = GET_MODE (operands[1]); + rtx (*unpack)(rtx, rtx); + rtx src, dest; + + switch (imode) + { + case V16QImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv8qiv8hi2; + else + unpack = gen_sse4_1_extendv8qiv8hi2; + break; + case V8HImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv4hiv4si2; + else + unpack = gen_sse4_1_extendv4hiv4si2; + break; + case V4SImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv2siv2di2; + else + unpack = gen_sse4_1_extendv2siv2di2; + break; + default: + gcc_unreachable (); + } + + dest = operands[0]; + if (high_p) + { + /* Shift higher 8 bytes to lower 8 bytes. */ + src = gen_reg_rtx (imode); + emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src), + gen_lowpart (TImode, operands[1]), + GEN_INT (64))); + } + else + src = operands[1]; + + emit_insn (unpack (dest, src)); +} + /* Expand conditional increment or decrement using adb/sbb instructions. The default case using setcc followed by the conditional move can be done by generic code. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 800807c..81ff925 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4722,7 +4722,10 @@ (match_operand:V16QI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, true); + else + ix86_expand_sse_unpack (operands, true, true); DONE; }) @@ -4731,7 +4734,10 @@ (match_operand:V16QI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, true); + else + ix86_expand_sse_unpack (operands, false, true); DONE; }) @@ -4740,7 +4746,10 @@ (match_operand:V16QI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, false); + else + ix86_expand_sse_unpack (operands, true, false); DONE; }) @@ -4749,7 +4758,10 @@ (match_operand:V16QI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, false); + else + ix86_expand_sse_unpack (operands, false, false); DONE; }) @@ -4758,7 +4770,10 @@ (match_operand:V8HI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, true); + else + ix86_expand_sse_unpack (operands, true, true); DONE; }) @@ -4767,7 +4782,10 @@ (match_operand:V8HI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, true); + else + ix86_expand_sse_unpack (operands, false, true); DONE; }) @@ -4776,7 +4794,10 @@ (match_operand:V8HI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, false); + else + ix86_expand_sse_unpack (operands, true, false); DONE; }) @@ -4785,7 +4806,10 @@ (match_operand:V8HI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, false); + else + ix86_expand_sse_unpack (operands, false, false); DONE; }) @@ -4794,7 +4818,10 @@ (match_operand:V4SI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, true); + else + ix86_expand_sse_unpack (operands, true, true); DONE; }) @@ -4803,7 +4830,10 @@ (match_operand:V4SI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, true); + else + ix86_expand_sse_unpack (operands, false, true); DONE; }) @@ -4812,7 +4842,10 @@ (match_operand:V4SI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, false); + else + ix86_expand_sse_unpack (operands, true, false); DONE; }) @@ -4821,7 +4854,10 @@ (match_operand:V4SI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, false); + else + ix86_expand_sse_unpack (operands, false, false); DONE; }) |