diff options
author | Uros Bizjak <uros@gcc.gnu.org> | 2011-11-14 20:36:33 +0100 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2011-11-14 20:36:33 +0100 |
commit | eab880cf621b82e4f38aed74b58d2bd76f89c46a (patch) | |
tree | 398a2f2896c360ca8b4a7aef191b1fe692beb2bf /gcc | |
parent | 2841f85e9c51a9ef0a81e23b57aedf76ad39621e (diff) | |
download | gcc-eab880cf621b82e4f38aed74b58d2bd76f89c46a.zip gcc-eab880cf621b82e4f38aed74b58d2bd76f89c46a.tar.gz gcc-eab880cf621b82e4f38aed74b58d2bd76f89c46a.tar.bz2 |
sse.md (round<mode>2_sfix): New expander.
* config/i386/sse.md (round<mode>2_sfix): New expander.
(round<mode>2_vec_pack_sfix): Ditto.
(<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>): Ditto.
(<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>): Ditto.
* config/i386/builtin-types.def (V4SI_FTYPE_V4SF_ROUND,
V8SI_FTYPE_V8SF_ROUND, V4SI_FTYPE_V2DF_V2DF_ROUND,
V8SI_FTYPE_V4DF_V4DF_ROUND): New builtin types.
* config/i386/i386.c (ix86_builtins): Add
IX86_BUILTIN_{FLOORPD,CEILPD,ROUNDPD_AZ}_VEC_PACK_SFIX{,256} and
IX86_BUILTIN_{FLOORPS,CEILPS,ROUNDPS_AZ}_SFIX{,256} defines.
(bdesc_args): Add __builtin_ia32_{floorpd,ceilpd}_vec_pack_sfix{,256},
__builtin_ia32_roundpd_az_vec_pack_sfix{,256},
__builtin_ia32_{floorps,ceilps}_sfix{,256}and
__builtin_ia32_roundps_az_sfix{,256} descriptions.
(ix86_expand_sse_round_vec_pack_sfix): New.
(ix86_expand_args_builtin): Handle V4SI_FTYPE_V4SF_ROUND,
V8SI_FTYPE_V8SF_ROUND, V4SI_FTYPE_V2DF_V2DF_ROUND and
V8SI_FTYPE_V4DF_V4DF_ROUND types. Check last argument of
CODE_FOR_sse4_1_roundpd_vec_pack_sfix, CODE_FOR_sse4_1_roundps_sfix,
CODE_FOR_avx_roundpd_vec_pack_sfix256 and CODE_FOR_avx_roundps_sfix256.
(ix86_builtin_vectorized_function): Handle
BUILT_IN_{I,L,LL}FLOOR{,F}, BUILT_IN_{I,L,LL}CEIL{,F} and
BUILT_IN_{I,L,LL}ROUND{,F}
testsuite/ChangeLog:
* gcc.target/i386/sse4_1-floor-sfix-vec.c: New test.
* gcc.target/i386/sse4_1-floorf-sfix-vec.c: Ditto.
* gcc.target/i386/avx-floor-sfix-vec.c: Ditto.
* gcc.target/i386/avx-floorf-sfix-vec.c: Ditto.
* gcc.target/i386/sse4_1-ceil-sfix-vec.c: Ditto.
* gcc.target/i386/sse4_1-ceilf-sfix-vec.c: Ditto.
* gcc.target/i386/avx-ceil-sfix-vec.c: Ditto.
* gcc.target/i386/avx-ceilf-sfix-vec.c: Ditto.
* gcc.target/i386/sse4_1-round-sfix-vec.c: Ditto.
* gcc.target/i386/sse4_1-roundf-sfix-vec.c: Ditto.
* gcc.target/i386/avx-round-sfix-vec.c: Ditto.
* gcc.target/i386/avx-roundf-sfix-vec.c: Ditto.
From-SVN: r181361
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 94 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin-types.def | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 195 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 72 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 23 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-ceil-sfix-vec.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-ceilf-sfix-vec.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-floor-sfix-vec.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-floorf-sfix-vec.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-round-sfix-vec.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-roundf-sfix-vec.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-ceil-sfix-vec.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-ceilf-sfix-vec.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-floor-sfix-vec.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-floorf-sfix-vec.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-round-sfix-vec.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-roundf-sfix-vec.c | 62 |
17 files changed, 769 insertions, 46 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 51ea394..fabb1da 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2011-11-14 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/sse.md (round<mode>2_sfix): New expander. + (round<mode>2_vec_pack_sfix): Ditto. + (<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>): Ditto. + (<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>): Ditto. + * config/i386/builtin-types.def (V4SI_FTYPE_V4SF_ROUND, + V8SI_FTYPE_V8SF_ROUND, V4SI_FTYPE_V2DF_V2DF_ROUND, + V8SI_FTYPE_V4DF_V4DF_ROUND): New builtin types. + * config/i386/i386.c (ix86_builtins): Add + IX86_BUILTIN_{FLOORPD,CEILPD,ROUNDPD_AZ}_VEC_PACK_SFIX{,256} and + IX86_BUILTIN_{FLOORPS,CEILPS,ROUNDPS_AZ}_SFIX{,256} defines. + (bdesc_args): Add __builtin_ia32_{floorpd,ceilpd}_vec_pack_sfix{,256}, + __builtin_ia32_roundpd_az_vec_pack_sfix{,256}, + __builtin_ia32_{floorps,ceilps}_sfix{,256}and + __builtin_ia32_roundps_az_sfix{,256} descriptions. + (ix86_expand_sse_round_vec_pack_sfix): New. + (ix86_expand_args_builtin): Handle V4SI_FTYPE_V4SF_ROUND, + V8SI_FTYPE_V8SF_ROUND, V4SI_FTYPE_V2DF_V2DF_ROUND and + V8SI_FTYPE_V4DF_V4DF_ROUND types. Check last argument of + CODE_FOR_sse4_1_roundpd_vec_pack_sfix, CODE_FOR_sse4_1_roundps_sfix, + CODE_FOR_avx_roundpd_vec_pack_sfix256 and CODE_FOR_avx_roundps_sfix256. + (ix86_builtin_vectorized_function): Handle + BUILT_IN_{I,L,LL}FLOOR{,F}, BUILT_IN_{I,L,LL}CEIL{,F} and + BUILT_IN_{I,L,LL}ROUND{,F} + 2011-11-14 Jan Hubicka <jh@suse.cz> PR middle-end/50598 @@ -11,38 +37,38 @@ 2011-11-14 Zolotukhin Michael <michael.v.zolotukhin@gmail.com> Jan Hubicka <jh@suse.cz> - * config/i386/i386.h (processor_costs): Add second dimension to - stringop_algs array. - * config/i386/i386.c (cost models): Initialize second dimension of - stringop_algs arrays. + * config/i386/i386.h (processor_costs): Add second dimension to + stringop_algs array. + * config/i386/i386.c (cost models): Initialize second dimension of + stringop_algs arrays. (core_cost): New costs based on generic64 costs with updated stringop values. - (promote_duplicated_reg): Add support for vector modes, add - declaration. - (promote_duplicated_reg_to_size): Likewise. + (promote_duplicated_reg): Add support for vector modes, add + declaration. + (promote_duplicated_reg_to_size): Likewise. (processor_target): Set core costs for core variants. - (expand_set_or_movmem_via_loop_with_iter): New function. - (expand_set_or_movmem_via_loop): Enable reuse of the same iters in - different loops, produced by this function. - (emit_strset): New function. - (expand_movmem_epilogue): Add epilogue generation for bigger sizes, - use SSE-moves where possible. - (expand_setmem_epilogue): Likewise. - (expand_movmem_prologue): Likewise for prologue. - (expand_setmem_prologue): Likewise. - (expand_constant_movmem_prologue): Likewise. - (expand_constant_setmem_prologue): Likewise. - (decide_alg): Add new argument align_unknown. Fix algorithm of - strategy selection if TARGET_INLINE_ALL_STRINGOPS is set; Skip sse_loop - (decide_alignment): Update desired alignment according to chosen move - mode. - (ix86_expand_movmem): Change unrolled_loop strategy to use SSE-moves. - (ix86_expand_setmem): Likewise. - (ix86_slow_unaligned_access): Implementation of new hook - slow_unaligned_access. - * config/i386/i386.md (strset): Enable half-SSE moves. - * config/i386/sse.md (vec_dupv4si): Add expand for vec_dupv4si. - (vec_dupv2di): Add expand for vec_dupv2di. + (expand_set_or_movmem_via_loop_with_iter): New function. + (expand_set_or_movmem_via_loop): Enable reuse of the same iters in + different loops, produced by this function. + (emit_strset): New function. + (expand_movmem_epilogue): Add epilogue generation for bigger sizes, + use SSE-moves where possible. + (expand_setmem_epilogue): Likewise. + (expand_movmem_prologue): Likewise for prologue. + (expand_setmem_prologue): Likewise. + (expand_constant_movmem_prologue): Likewise. + (expand_constant_setmem_prologue): Likewise. + (decide_alg): Add new argument align_unknown. Fix algorithm of + strategy selection if TARGET_INLINE_ALL_STRINGOPS is set; Skip sse_loop + (decide_alignment): Update desired alignment according to chosen move + mode. + (ix86_expand_movmem): Change unrolled_loop strategy to use SSE-moves. + (ix86_expand_setmem): Likewise. + (ix86_slow_unaligned_access): Implementation of new hook + slow_unaligned_access. + * config/i386/i386.md (strset): Enable half-SSE moves. + * config/i386/sse.md (vec_dupv4si): Add expand for vec_dupv4si. + (vec_dupv2di): Add expand for vec_dupv2di. 2011-11-14 Dimitrios Apostolou <jimis@gmx.net> @@ -53,8 +79,7 @@ 2011-11-14 Kai Tietz <ktietz@redhat.com> - * gcov.c (generate_results): Add missing semicolon and - correct indent. + * gcov.c (generate_results): Add missing semicolon and correct indent. 2011-11-14 Ira Rosen <ira.rosen@linaro.org> @@ -71,9 +96,8 @@ PR target/50694 * config/sh/sh.h (IS_LITTLE_ENDIAN_OPTION, UNSUPPORTED_SH2A): New macros. - (DRIVER_SELF_SPECS): Use new macros to filter out - unsupported options taking the default configuration into - account. + (DRIVER_SELF_SPECS): Use new macros to filter out unsupported options + taking the default configuration into account. 2011-11-13 Jonathan Wakely <jwakely.gcc@gmail.com> @@ -110,7 +134,7 @@ 2011-11-12 Richard Henderson <rth@redhat.com> - * config/rs6000/rs6000.md (fix_trunc<SFDF>si2_stfiwx): Use + * config/rs6000/rs6000.md (fix_trunc<SFDF>si2_stfiwx): Use nonimmediate_operand for the destination. (fixuns_trunc<SFDF>si2_stfiwx): Likewise. diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 5dcb68c..79fb1427 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -465,6 +465,11 @@ DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V8SF_FTYPE_V8SF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V2DF_V2DF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V4DF_V4DF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V8SF, ROUND) + DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DF_V2DF, PTEST) DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DI_V2DI, PTEST) DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V4DF_V4DF, PTEST) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a8e8a6a..e307999 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -25457,22 +25457,32 @@ enum ix86_builtins IX86_BUILTIN_PMULDQ128, IX86_BUILTIN_PMULLD128, - IX86_BUILTIN_ROUNDPD, - IX86_BUILTIN_ROUNDPS, IX86_BUILTIN_ROUNDSD, IX86_BUILTIN_ROUNDSS, + IX86_BUILTIN_ROUNDPD, + IX86_BUILTIN_ROUNDPS, + IX86_BUILTIN_FLOORPD, IX86_BUILTIN_CEILPD, IX86_BUILTIN_TRUNCPD, IX86_BUILTIN_RINTPD, IX86_BUILTIN_ROUNDPD_AZ, + + IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, + IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, + IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, + IX86_BUILTIN_FLOORPS, IX86_BUILTIN_CEILPS, IX86_BUILTIN_TRUNCPS, IX86_BUILTIN_RINTPS, IX86_BUILTIN_ROUNDPS_AZ, + IX86_BUILTIN_FLOORPS_SFIX, + IX86_BUILTIN_CEILPS_SFIX, + IX86_BUILTIN_ROUNDPS_AZ_SFIX, + IX86_BUILTIN_PTESTZ, IX86_BUILTIN_PTESTC, IX86_BUILTIN_PTESTNZC, @@ -25646,12 +25656,21 @@ enum ix86_builtins IX86_BUILTIN_TRUNCPD256, IX86_BUILTIN_RINTPD256, IX86_BUILTIN_ROUNDPD_AZ256, + + IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, + IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, + IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, + IX86_BUILTIN_FLOORPS256, IX86_BUILTIN_CEILPS256, IX86_BUILTIN_TRUNCPS256, IX86_BUILTIN_RINTPS256, IX86_BUILTIN_ROUNDPS_AZ256, + IX86_BUILTIN_FLOORPS_SFIX256, + IX86_BUILTIN_CEILPS_SFIX256, + IX86_BUILTIN_ROUNDPS_AZ_SFIX256, + IX86_BUILTIN_UNPCKHPD256, IX86_BUILTIN_UNPCKLPD256, IX86_BUILTIN_UNPCKHPS256, @@ -26856,14 +26875,22 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, @@ -26983,13 +27010,21 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND }, { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, @@ -28430,7 +28465,7 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp, return SUBREG_REG (target); } -/* Subroutine of ix86_expand_args_builtin to take care of round insns. */ +/* Subroutines of ix86_expand_args_builtin to take care of round insns. */ static rtx ix86_expand_sse_round (const struct builtin_description *d, tree exp, @@ -28463,6 +28498,44 @@ ix86_expand_sse_round (const struct builtin_description *d, tree exp, return target; } +static rtx +ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2; + enum machine_mode tmode = insn_data[d->icode].operand[0].mode; + enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; + enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; + + if (optimize || target == 0 + || GET_MODE (target) != tmode + || !insn_data[d->icode].operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + op0 = safe_vector_operand (op0, mode0); + op1 = safe_vector_operand (op1, mode1); + + if ((optimize && !register_operand (op0, mode0)) + || !insn_data[d->icode].operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || !insn_data[d->icode].operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + op2 = GEN_INT (d->comparison); + + pat = GEN_FCN (d->icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + /* Subroutine of ix86_expand_builtin to take care of ptest insns. */ static rtx @@ -28736,7 +28809,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V4DF_FTYPE_V4DF_ROUND: case V4SF_FTYPE_V4SF_ROUND: case V8SF_FTYPE_V8SF_ROUND: + case V4SI_FTYPE_V4SF_ROUND: + case V8SI_FTYPE_V8SF_ROUND: return ix86_expand_sse_round (d, exp, target); + case V4SI_FTYPE_V2DF_V2DF_ROUND: + case V8SI_FTYPE_V4DF_V4DF_ROUND: + return ix86_expand_sse_round_vec_pack_sfix (d, exp, target); case INT_FTYPE_V8SF_V8SF_PTEST: case INT_FTYPE_V4DI_V4DI_PTEST: case INT_FTYPE_V4DF_V4DF_PTEST: @@ -29053,15 +29131,22 @@ ix86_expand_args_builtin (const struct builtin_description *d, error ("the last argument must be an 1-bit immediate"); return const0_rtx; - case CODE_FOR_sse4_1_roundpd: - case CODE_FOR_sse4_1_roundps: case CODE_FOR_sse4_1_roundsd: case CODE_FOR_sse4_1_roundss: + + case CODE_FOR_sse4_1_roundpd: + case CODE_FOR_sse4_1_roundps: + case CODE_FOR_avx_roundpd256: + case CODE_FOR_avx_roundps256: + + case CODE_FOR_sse4_1_roundpd_vec_pack_sfix: + case CODE_FOR_sse4_1_roundps_sfix: + case CODE_FOR_avx_roundpd_vec_pack_sfix256: + case CODE_FOR_avx_roundps_sfix256: + case CODE_FOR_sse4_1_blendps: case CODE_FOR_avx_blendpd256: case CODE_FOR_avx_vpermilv4df: - case CODE_FOR_avx_roundpd256: - case CODE_FOR_avx_roundps256: error ("the last argument must be a 4-bit immediate"); return const0_rtx; @@ -30087,6 +30172,70 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out, } break; + case BUILT_IN_IFLOOR: + case BUILT_IN_LFLOOR: + case BUILT_IN_LLFLOOR: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == SImode && in_mode == DFmode) + { + if (out_n == 4 && in_n == 2) + return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX]; + else if (out_n == 8 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256]; + } + break; + + case BUILT_IN_IFLOORF: + case BUILT_IN_LFLOORF: + case BUILT_IN_LLFLOORF: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == SImode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX]; + else if (out_n == 8 && in_n == 8) + return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256]; + } + break; + + case BUILT_IN_ICEIL: + case BUILT_IN_LCEIL: + case BUILT_IN_LLCEIL: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == SImode && in_mode == DFmode) + { + if (out_n == 4 && in_n == 2) + return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX]; + else if (out_n == 8 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256]; + } + break; + + case BUILT_IN_ICEILF: + case BUILT_IN_LCEILF: + case BUILT_IN_LLCEILF: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == SImode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX]; + else if (out_n == 8 && in_n == 8) + return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256]; + } + break; + case BUILT_IN_IRINT: case BUILT_IN_LRINT: case BUILT_IN_LLRINT: @@ -30111,6 +30260,38 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out, } break; + case BUILT_IN_IROUND: + case BUILT_IN_LROUND: + case BUILT_IN_LLROUND: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == SImode && in_mode == DFmode) + { + if (out_n == 4 && in_n == 2) + return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX]; + else if (out_n == 8 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256]; + } + break; + + case BUILT_IN_IROUNDF: + case BUILT_IN_LROUNDF: + case BUILT_IN_LLROUNDF: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == SImode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX]; + else if (out_n == 8 && in_n == 8) + return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256]; + } + break; + case BUILT_IN_COPYSIGN: if (out_mode == DFmode && in_mode == DFmode) { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4938abf..147646b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -9902,6 +9902,45 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) +(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>" + [(match_operand:<sseintvecmode> 0 "register_operand" "") + (match_operand:VF1 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_0_to_15_operand" "")] + "TARGET_ROUND" +{ + rtx tmp = gen_reg_rtx (<MODE>mode); + + emit_insn + (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1], + operands[2])); + emit_insn + (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); + DONE; +}) + +(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>" + [(match_operand:<ssepackfltmode> 0 "register_operand" "") + (match_operand:VF2 1 "nonimmediate_operand" "") + (match_operand:VF2 2 "nonimmediate_operand" "") + (match_operand:SI 3 "const_0_to_15_operand" "")] + "TARGET_ROUND" +{ + rtx tmp0, tmp1; + + tmp0 = gen_reg_rtx (<MODE>mode); + tmp1 = gen_reg_rtx (<MODE>mode); + + emit_insn + (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1], + operands[3])); + emit_insn + (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2], + operands[3])); + emit_insn + (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); + DONE; +}) + (define_insn "sse4_1_round<ssescalarmodesuffix>" [(set (match_operand:VF_128 0 "register_operand" "=x,x") (vec_merge:VF_128 @@ -9957,6 +9996,39 @@ operands[5] = GEN_INT (ROUND_TRUNC); }) +(define_expand "round<mode>2_sfix" + [(match_operand:<sseintvecmode> 0 "register_operand" "") + (match_operand:VF1 1 "nonimmediate_operand" "")] + "TARGET_ROUND && !flag_trapping_math" +{ + rtx tmp = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_round<mode>2 (tmp, operands[1])); + + emit_insn + (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); + DONE; +}) + +(define_expand "round<mode>2_vec_pack_sfix" + [(match_operand:<ssepackfltmode> 0 "register_operand" "") + (match_operand:VF2 1 "nonimmediate_operand" "") + (match_operand:VF2 2 "nonimmediate_operand" "")] + "TARGET_ROUND && !flag_trapping_math" +{ + rtx tmp0, tmp1; + + tmp0 = gen_reg_rtx (<MODE>mode); + tmp1 = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_round<mode>2 (tmp0, operands[1])); + emit_insn (gen_round<mode>2 (tmp1, operands[2])); + + emit_insn + (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); + DONE; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Intel SSE4.2 string/text processing instructions diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7a7445e..e371383 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,18 @@ +2011-11-14 Uros Bizjak <ubizjak@gmail.com> + + * gcc.target/i386/sse4_1-floor-sfix-vec.c: New test. + * gcc.target/i386/sse4_1-floorf-sfix-vec.c: Ditto. + * gcc.target/i386/avx-floor-sfix-vec.c: Ditto. + * gcc.target/i386/avx-floorf-sfix-vec.c: Ditto. + * gcc.target/i386/sse4_1-ceil-sfix-vec.c: Ditto. + * gcc.target/i386/sse4_1-ceilf-sfix-vec.c: Ditto. + * gcc.target/i386/avx-ceil-sfix-vec.c: Ditto. + * gcc.target/i386/avx-ceilf-sfix-vec.c: Ditto. + * gcc.target/i386/sse4_1-round-sfix-vec.c: Ditto. + * gcc.target/i386/sse4_1-roundf-sfix-vec.c: Ditto. + * gcc.target/i386/avx-round-sfix-vec.c: Ditto. + * gcc.target/i386/avx-roundf-sfix-vec.c: Ditto. + 2011-11-14 Fabien ChĂȘne <fabien@gcc.gnu.org> PR c++/6936 @@ -309,8 +324,8 @@ 2011-11-09 Janne Blomqvist <jb@gcc.gnu.org> - PR libfortran/50016 - * gfortran.dg/inquire_size.f90: Don't flush the unit. + PR libfortran/50016 + * gfortran.dg/inquire_size.f90: Don't flush the unit. 2011-11-09 Richard Guenther <rguenther@suse.de> @@ -495,8 +510,8 @@ 2011-11-07 Janne Blomqvist <jb@gcc.gnu.org> - PR libfortran/45723 - * gfortran.dg/open_dev_null.f90: Remove testcase. + PR libfortran/45723 + * gfortran.dg/open_dev_null.f90: Remove testcase. 2011-11-07 Uros Bizjak <ubizjak@gmail.com> diff --git a/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-vec.c b/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-vec.c new file mode 100644 index 0000000..ac0911f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-vec.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-ceil-sfix-vec.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-ceilf-sfix-vec.c b/gcc/testsuite/gcc.target/i386/avx-ceilf-sfix-vec.c new file mode 100644 index 0000000..789b78e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ceilf-sfix-vec.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-ceilf-sfix-vec.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-floor-sfix-vec.c b/gcc/testsuite/gcc.target/i386/avx-floor-sfix-vec.c new file mode 100644 index 0000000..efa557c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-floor-sfix-vec.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-floor-sfix-vec.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-floorf-sfix-vec.c b/gcc/testsuite/gcc.target/i386/avx-floorf-sfix-vec.c new file mode 100644 index 0000000..0c1587a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-floorf-sfix-vec.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-floorf-sfix-vec.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-round-sfix-vec.c b/gcc/testsuite/gcc.target/i386/avx-round-sfix-vec.c new file mode 100644 index 0000000..5adfffa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-round-sfix-vec.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-round-sfix-vec.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-roundf-sfix-vec.c b/gcc/testsuite/gcc.target/i386/avx-roundf-sfix-vec.c new file mode 100644 index 0000000..1fd4591 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-roundf-sfix-vec.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-roundf-sfix-vec.c" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-ceil-sfix-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-ceil-sfix-vec.c new file mode 100644 index 0000000..ca07d9c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-ceil-sfix-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <math.h> + +extern double ceil (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) ceil (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) ceil (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-ceilf-sfix-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-ceilf-sfix-vec.c new file mode 100644 index 0000000..b0559bf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-ceilf-sfix-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <math.h> + +extern float ceilf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + float a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) ceilf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) ceilf (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-floor-sfix-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-floor-sfix-vec.c new file mode 100644 index 0000000..2083a60 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-floor-sfix-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <math.h> + +extern double floor (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) floor (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) floor (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-floorf-sfix-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-floorf-sfix-vec.c new file mode 100644 index 0000000..aa2976d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-floorf-sfix-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <math.h> + +extern float floorf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + float a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) floorf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) floorf (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-round-sfix-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-round-sfix-vec.c new file mode 100644 index 0000000..9abbe55 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-round-sfix-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <math.h> + +extern double round (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) round (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) round (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundf-sfix-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundf-sfix-vec.c new file mode 100644 index 0000000..5384e5c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundf-sfix-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <math.h> + +extern float roundf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + float a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) roundf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) roundf (a[i])) + abort(); +} |