diff options
author | Richard Henderson <rth@redhat.com> | 2007-03-26 17:30:32 -0700 |
---|---|---|
committer | Richard Henderson <rth@gcc.gnu.org> | 2007-03-26 17:30:32 -0700 |
commit | 24bfafbc9d95153f4ad59d6618eae963a83723a0 (patch) | |
tree | 6a311a473b6819715b29447bec1588f0b5b71df9 | |
parent | df12b78f89bbeb779078c5817f62129f9a9e9db6 (diff) | |
download | gcc-24bfafbc9d95153f4ad59d6618eae963a83723a0.zip gcc-24bfafbc9d95153f4ad59d6618eae963a83723a0.tar.gz gcc-24bfafbc9d95153f4ad59d6618eae963a83723a0.tar.bz2 |
re PR target/31361 (SSE2 generation bug with shifts)
PR target/31361
* config/i386/i386.c (IX86_BUILTIN_PSLLDQ128, IX86_BUILTIN_PSLLW128,
IX86_BUILTIN_PSLLD128, IX86_BUILTIN_PSLLQ128, IX86_BUILTIN_PSRAW128,
IX86_BUILTIN_PSRAD128, IX86_BUILTIN_PSRLW128, IX86_BUILTIN_PSRLD128,
IX86_BUILTIN_PSRLQ128): New.
(ix86_init_mmx_sse_builtins): Add them.
(ix86_expand_builtin): Expand them.
* config/i386/sse.md (ashr<mode>3, lshr<mode>3, ashl<mode>3): Make
operand 2 be TImode.
* config/i386/emmintrin.h (_mm_slli_epi64, _mm_srai_epi16,
_mm_srai_epi32, _mm_srli_epi16, _mm_srli_epi32,
_mm_srli_epi64): Mark __B const.
(_mm_srli_si128, _mm_srli_si128): Fix disabled inline versions.
(_mm_sll_epi16, _mm_sll_epi32, _mm_sll_epi64, _mm_sra_epi16,
_mm_sra_epi32, _mm_srl_epi16, _mm_srl_epi32, _mm_srl_epi64): Use
new two-vector shift builtins.
From-SVN: r123250
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/config/i386/emmintrin.h | 32 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 120 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 6 |
4 files changed, 156 insertions, 21 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b863665..bba2586 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2007-03-26 Richard Henderson <rth@redhat.com> + + PR target/31361 + * config/i386/i386.c (IX86_BUILTIN_PSLLDQ128, IX86_BUILTIN_PSLLW128, + IX86_BUILTIN_PSLLD128, IX86_BUILTIN_PSLLQ128, IX86_BUILTIN_PSRAW128, + IX86_BUILTIN_PSRAD128, IX86_BUILTIN_PSRLW128, IX86_BUILTIN_PSRLD128, + IX86_BUILTIN_PSRLQ128): New. + (ix86_init_mmx_sse_builtins): Add them. + (ix86_expand_builtin): Expand them. + * config/i386/sse.md (ashr<mode>3, lshr<mode>3, ashl<mode>3): Make + operand 2 be TImode. + * config/i386/emmintrin.h (_mm_slli_epi64, _mm_srai_epi16, + _mm_srai_epi32, _mm_srli_epi16, _mm_srli_epi32, + _mm_srli_epi64): Mark __B const. + (_mm_srli_si128, _mm_srli_si128): Fix disabled inline versions. + (_mm_sll_epi16, _mm_sll_epi32, _mm_sll_epi64, _mm_sra_epi16, + _mm_sra_epi32, _mm_srl_epi16, _mm_srl_epi32, _mm_srl_epi64): Use + new two-vector shift builtins. + 2007-03-26 Ian Lance Taylor <iant@google.com> PR tree-optimization/31345 diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 002e16c..a230a70 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -1117,19 +1117,19 @@ _mm_slli_epi32 (__m128i __A, int __B) } static __inline __m128i __attribute__((__always_inline__)) -_mm_slli_epi64 (__m128i __A, int __B) +_mm_slli_epi64 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); } static __inline __m128i __attribute__((__always_inline__)) -_mm_srai_epi16 (__m128i __A, int __B) +_mm_srai_epi16 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); } static __inline __m128i __attribute__((__always_inline__)) -_mm_srai_epi32 (__m128i __A, int __B) +_mm_srai_epi32 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); } @@ -1138,13 +1138,13 @@ _mm_srai_epi32 (__m128i __A, int __B) static __m128i __attribute__((__always_inline__)) _mm_srli_si128 (__m128i __A, const int __B) { - return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) + return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8)); } static __m128i __attribute__((__always_inline__)) _mm_srli_si128 (__m128i __A, const int __B) { - return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) + return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8)); } #else #define _mm_srli_si128(__A, __B) \ @@ -1154,19 +1154,19 @@ _mm_srli_si128 (__m128i __A, const int __B) #endif static __inline __m128i __attribute__((__always_inline__)) -_mm_srli_epi16 (__m128i __A, int __B) +_mm_srli_epi16 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); } static __inline __m128i __attribute__((__always_inline__)) -_mm_srli_epi32 (__m128i __A, int __B) +_mm_srli_epi32 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); } static __inline __m128i __attribute__((__always_inline__)) -_mm_srli_epi64 (__m128i __A, int __B) +_mm_srli_epi64 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); } @@ -1174,49 +1174,49 @@ _mm_srli_epi64 (__m128i __A, int __B) static __inline __m128i __attribute__((__always_inline__)) _mm_sll_epi16 (__m128i __A, __m128i __B) { - return _mm_slli_epi16 (__A, _mm_cvtsi128_si32 (__B)); + return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B); } static __inline __m128i __attribute__((__always_inline__)) _mm_sll_epi32 (__m128i __A, __m128i __B) { - return _mm_slli_epi32 (__A, _mm_cvtsi128_si32 (__B)); + return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B); } static __inline __m128i __attribute__((__always_inline__)) _mm_sll_epi64 (__m128i __A, __m128i __B) { - return _mm_slli_epi64 (__A, _mm_cvtsi128_si32 (__B)); + return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B); } static __inline __m128i __attribute__((__always_inline__)) _mm_sra_epi16 (__m128i __A, __m128i __B) { - return _mm_srai_epi16 (__A, _mm_cvtsi128_si32 (__B)); + return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B); } static __inline __m128i __attribute__((__always_inline__)) _mm_sra_epi32 (__m128i __A, __m128i __B) { - return _mm_srai_epi32 (__A, _mm_cvtsi128_si32 (__B)); + return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B); } static __inline __m128i __attribute__((__always_inline__)) _mm_srl_epi16 (__m128i __A, __m128i __B) { - return _mm_srli_epi16 (__A, _mm_cvtsi128_si32 (__B)); + return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B); } static __inline __m128i __attribute__((__always_inline__)) _mm_srl_epi32 (__m128i __A, __m128i __B) { - return _mm_srli_epi32 (__A, _mm_cvtsi128_si32 (__B)); + return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B); } static __inline __m128i __attribute__((__always_inline__)) _mm_srl_epi64 (__m128i __A, __m128i __B) { - return _mm_srli_epi64 (__A, _mm_cvtsi128_si32 (__B)); + return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); } static __inline __m128i __attribute__((__always_inline__)) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c5bd7ef..c5e08da 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15937,6 +15937,16 @@ enum ix86_builtins IX86_BUILTIN_PSRLDI128, IX86_BUILTIN_PSRLQI128, + IX86_BUILTIN_PSLLDQ128, + IX86_BUILTIN_PSLLW128, + IX86_BUILTIN_PSLLD128, + IX86_BUILTIN_PSLLQ128, + IX86_BUILTIN_PSRAW128, + IX86_BUILTIN_PSRAD128, + IX86_BUILTIN_PSRLW128, + IX86_BUILTIN_PSRLD128, + IX86_BUILTIN_PSRLQ128, + IX86_BUILTIN_PUNPCKHBW128, IX86_BUILTIN_PUNPCKHWD128, IX86_BUILTIN_PUNPCKHDQ128, @@ -17055,14 +17065,22 @@ ix86_init_mmx_sse_builtins (void) def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128); + def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128); + def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128); def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); @@ -17784,9 +17802,106 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, emit_insn (pat); return target; + case IX86_BUILTIN_PSLLWI128: + icode = CODE_FOR_ashlv8hi3; + goto do_pshifti; + case IX86_BUILTIN_PSLLDI128: + icode = CODE_FOR_ashlv4si3; + goto do_pshifti; + case IX86_BUILTIN_PSLLQI128: + icode = CODE_FOR_ashlv2di3; + goto do_pshifti; + case IX86_BUILTIN_PSRAWI128: + icode = CODE_FOR_ashrv8hi3; + goto do_pshifti; + case IX86_BUILTIN_PSRADI128: + icode = CODE_FOR_ashrv4si3; + goto do_pshifti; + case IX86_BUILTIN_PSRLWI128: + icode = CODE_FOR_lshrv8hi3; + goto do_pshifti; + case IX86_BUILTIN_PSRLDI128: + icode = CODE_FOR_lshrv4si3; + goto do_pshifti; + case IX86_BUILTIN_PSRLQI128: + icode = CODE_FOR_lshrv2di3; + goto do_pshifti; + do_pshifti: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!CONST_INT_P (op1)) + { + error ("shift must be an immediate"); + return const0_rtx; + } + if (INTVAL (op1) < 0 || INTVAL (op1) > 255) + op1 = GEN_INT (255); + + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_reg (op0); + + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (!pat) + return 0; + emit_insn (pat); + return target; + + case IX86_BUILTIN_PSLLW128: + icode = CODE_FOR_ashlv8hi3; + goto do_pshift; + case IX86_BUILTIN_PSLLD128: + icode = CODE_FOR_ashlv4si3; + goto do_pshift; + case IX86_BUILTIN_PSLLQ128: + icode = CODE_FOR_ashlv2di3; + goto do_pshift; + case IX86_BUILTIN_PSRAW128: + icode = CODE_FOR_ashrv8hi3; + goto do_pshift; + case IX86_BUILTIN_PSRAD128: + icode = CODE_FOR_ashrv4si3; + goto do_pshift; + case IX86_BUILTIN_PSRLW128: + icode = CODE_FOR_lshrv8hi3; + goto do_pshift; + case IX86_BUILTIN_PSRLD128: + icode = CODE_FOR_lshrv4si3; + goto do_pshift; + case IX86_BUILTIN_PSRLQ128: + icode = CODE_FOR_lshrv2di3; + goto do_pshift; + do_pshift: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_reg (op0); + + op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0); + if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) + op1 = copy_to_reg (op1); + + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (!pat) + return 0; + emit_insn (pat); + return target; + case IX86_BUILTIN_PSLLDQI128: case IX86_BUILTIN_PSRLDQI128: - icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 + icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 : CODE_FOR_sse2_lshrti3); arg0 = CALL_EXPR_ARG (exp, 0); arg1 = CALL_EXPR_ARG (exp, 1); @@ -17807,7 +17922,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return const0_rtx; } target = gen_reg_rtx (V2DImode); - pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1); + pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), + op0, op1); if (! pat) return 0; emit_insn (pat); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index cc123cc..3d00df8 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3085,7 +3085,7 @@ [(set (match_operand:SSEMODE24 0 "register_operand" "=x") (ashiftrt:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xn")))] "TARGET_SSE2" "psra<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -3095,7 +3095,7 @@ [(set (match_operand:SSEMODE248 0 "register_operand" "=x") (lshiftrt:SSEMODE248 (match_operand:SSEMODE248 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xn")))] "TARGET_SSE2" "psrl<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -3105,7 +3105,7 @@ [(set (match_operand:SSEMODE248 0 "register_operand" "=x") (ashift:SSEMODE248 (match_operand:SSEMODE248 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xn")))] "TARGET_SSE2" "psll<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") |