diff options
author | Roger Sayle <roger@eyesopen.com> | 2006-04-16 21:46:59 +0000 |
---|---|---|
committer | Roger Sayle <sayle@gcc.gnu.org> | 2006-04-16 21:46:59 +0000 |
commit | 2ff619482da2a22a0099aacfd9e3118e25256aac (patch) | |
tree | b6b0e1ede375a1e186941f57d26fa43ea68e93cf /gcc/config | |
parent | 3c86fb4e17941da9eb4026bda6301bf0a74a96fe (diff) | |
download | gcc-2ff619482da2a22a0099aacfd9e3118e25256aac.zip gcc-2ff619482da2a22a0099aacfd9e3118e25256aac.tar.gz gcc-2ff619482da2a22a0099aacfd9e3118e25256aac.tar.bz2 |
re PR target/24076 ((vector char){x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x} code gen is not that good)
2006-04-15 Roger Sayle <roger@eyesopen.com>
Andrew Pinski <pinskia@gcc.gnu.org>
Dale Johannesen <dalej@apple.com>
PR target/24076
* config/i386/i386.c (ix86_expand_vector_init_duplicate): Add
special case code to implement V8HImode and V16QImode with SSE2.
* gcc.target/i386/vecinit-3.c: New testcase.
* gcc.target/i386/vecinit-4.c: Likewise.
* gcc.target/i386/sse-18.c: Likewise.
* gcc.target/i386/sse-19.c: Likewise.
Co-Authored-By: Andrew Pinski <pinskia@gcc.gnu.org>
Co-Authored-By: Dale Johannesen <dalej@apple.com>
From-SVN: r112990
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386.c | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4d3a972..3fc19bf 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -17856,11 +17856,66 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, wvmode = V4HImode; goto widen; case V8HImode: + if (TARGET_SSE2) + { + rtx tmp1, tmp2; + /* Extend HImode to SImode using a paradoxical SUBREG. */ + tmp1 = gen_reg_rtx (SImode); + emit_move_insn (tmp1, gen_lowpart (SImode, val)); + /* Insert the SImode value as low element of V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + tmp1 = gen_rtx_VEC_MERGE (V4SImode, + gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), + CONST0_RTX (V4SImode), + const1_rtx); + emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); + /* Cast the V4SImode vector back to a V8HImode vector. */ + tmp1 = gen_reg_rtx (V8HImode); + emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); + /* Duplicate the low short through the whole low SImode word. */ + emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); + /* Cast the V8HImode vector back to a V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); + /* Replicate the low element of the V4SImode vector. */ + emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); + /* Cast the V2SImode back to V8HImode, and store in target. */ + emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); + return true; + } smode = HImode; wsmode = SImode; wvmode = V4SImode; goto widen; case V16QImode: + if (TARGET_SSE2) + { + rtx tmp1, tmp2; + /* Extend QImode to SImode using a paradoxical SUBREG. */ + tmp1 = gen_reg_rtx (SImode); + emit_move_insn (tmp1, gen_lowpart (SImode, val)); + /* Insert the SImode value as low element of V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + tmp1 = gen_rtx_VEC_MERGE (V4SImode, + gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), + CONST0_RTX (V4SImode), + const1_rtx); + emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); + /* Cast the V4SImode vector back to a V16QImode vector. */ + tmp1 = gen_reg_rtx (V16QImode); + emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); + /* Duplicate the low byte through the whole low SImode word. */ + emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); + emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); + /* Cast the V16QImode vector back to a V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); + /* Replicate the low element of the V4SImode vector. */ + emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); + /* Cast the V2SImode back to V16QImode, and store in target. */ + emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); + return true; + } smode = QImode; wsmode = HImode; wvmode = V8HImode; |