diff options
author | Uros Bizjak <uros@gcc.gnu.org> | 2011-08-26 21:42:30 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2011-08-26 21:42:30 +0200 |
commit | bbeb5bebd66e300ae907d133740580a7b4f33c5b (patch) | |
tree | 6ba15c2675c44333e41b6c6c577891ecd9576e46 /gcc | |
parent | 44782c0cba5373316f040b9c9074ded0c3e889f2 (diff) | |
download | gcc-bbeb5bebd66e300ae907d133740580a7b4f33c5b.zip gcc-bbeb5bebd66e300ae907d133740580a7b4f33c5b.tar.gz gcc-bbeb5bebd66e300ae907d133740580a7b4f33c5b.tar.bz2 |
i386.md (round<mode>2): New expander.
* config/i386/i386.md (round<mode>2): New expander.
* config/i386/i386.c (enum ix86_builtins): Add
IX86_BUILTIN_ROUND{PS,PD}_AZ{,256}.
(struct builtin_description): Add __builtin_ia32_round{ps,pd}_az{,256}
descriptions.
(ix86_builtin_vectorized_function): Handle BUILT_IN_ROUND{,F} builtins.
testsuite/ChangeLog:
* gcc.target/i386/sse_4_1-round-vec.c: New test.
* gcc.target/i386/sse_4_1-roundf-vec.c: New test.
* gcc.target/i386/avx-round-vec.c: New test.
* gcc.target/i386/avx-roundf-vec.c: New test.
From-SVN: r178123
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.c | 40 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 34 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-round-vec.c | 54 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-roundf-vec.c | 54 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-round-vec.c | 54 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-roundf-vec.c | 54 |
7 files changed, 301 insertions, 4 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ce6fd80..698bc769 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -23661,10 +23661,12 @@ enum ix86_builtins IX86_BUILTIN_CEILPD, IX86_BUILTIN_TRUNCPD, IX86_BUILTIN_RINTPD, + IX86_BUILTIN_ROUNDPD_AZ, IX86_BUILTIN_FLOORPS, IX86_BUILTIN_CEILPS, IX86_BUILTIN_TRUNCPS, IX86_BUILTIN_RINTPS, + IX86_BUILTIN_ROUNDPS_AZ, IX86_BUILTIN_PTESTZ, IX86_BUILTIN_PTESTC, @@ -23837,10 +23839,12 @@ enum ix86_builtins IX86_BUILTIN_CEILPD256, IX86_BUILTIN_TRUNCPD256, IX86_BUILTIN_RINTPD256, + IX86_BUILTIN_ROUNDPD_AZ256, IX86_BUILTIN_FLOORPS256, IX86_BUILTIN_CEILPS256, IX86_BUILTIN_TRUNCPS256, IX86_BUILTIN_RINTPS256, + IX86_BUILTIN_ROUNDPS_AZ256, IX86_BUILTIN_UNPCKHPD256, IX86_BUILTIN_UNPCKLPD256, @@ -25063,11 +25067,15 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, @@ -25185,11 +25193,15 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, @@ -28146,6 +28158,34 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out, } break; + case BUILT_IN_ROUND: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ]; + else if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256]; + } + break; + + case BUILT_IN_ROUNDF: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ]; + else if (out_n == 8 && in_n == 8) + return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256]; + } + break; + case BUILT_IN_FMA: if (out_mode == DFmode && in_mode == DFmode) { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 566845b..e1c70eb 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -9646,6 +9646,40 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "<MODE>")]) +(define_expand "round<mode>2" + [(set (match_dup 4) + (plus:VF + (match_operand:VF 1 "nonimmediate_operand" "") + (match_dup 3))) + (set (match_operand:VF 0 "register_operand" "") + (unspec:VF + [(match_dup 4) (match_dup 5)] + UNSPEC_ROUND))] + "TARGET_ROUND && !flag_trapping_math" +{ + enum machine_mode scalar_mode; + const struct real_format *fmt; + REAL_VALUE_TYPE pred_half, half_minus_pred_half; + rtx half, vec_half; + + scalar_mode = GET_MODE_INNER (<MODE>mode); + + /* load nextafter (0.5, 0.0) */ + fmt = REAL_MODE_FORMAT (scalar_mode); + real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode); + REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); + half = const_double_from_real_value (pred_half, scalar_mode); + + vec_half = ix86_build_const_vector (<MODE>mode, true, half); + vec_half = force_reg (<MODE>mode, vec_half); + + operands[3] = gen_reg_rtx (<MODE>mode); + emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1])); + + operands[4] = gen_reg_rtx (<MODE>mode); + operands[5] = GEN_INT (ROUND_TRUNC); +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Intel SSE4.2 string/text processing instructions diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 304a016..8974d2e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2011-08-26 Uros Bizjak <ubizjak@gmail.com> + + * gcc.target/i386/sse_4_1-round-vec.c: New test. + * gcc.target/i386/sse_4_1-roundf-vec.c: New test. + * gcc.target/i386/avx-round-vec.c: New test. + * gcc.target/i386/avx-roundf-vec.c: New test. + 2011-08-26 Jakub Jelinek <jakub@redhat.com> * gcc.target/i386/cmpxchg16b-1.c: Match also space after the @@ -12,10 +19,10 @@ 2011-08-26 Jiangning Liu <jiangning.liu@arm.com> - * gcc.target/arm/thumb2-cond-cmp-1.c: New. - * gcc.target/arm/thumb2-cond-cmp-2.c: Likewise. - * gcc.target/arm/thumb2-cond-cmp-3.c: Likewise. - * gcc.target/arm/thumb2-cond-cmp-4.c: Likewise. + * gcc.target/arm/thumb2-cond-cmp-1.c: New. + * gcc.target/arm/thumb2-cond-cmp-2.c: Likewise. + * gcc.target/arm/thumb2-cond-cmp-3.c: Likewise. + * gcc.target/arm/thumb2-cond-cmp-4.c: Likewise. 2011-08-26 Andrew Stubbs <ams@codesourcery.com> diff --git a/gcc/testsuite/gcc.target/i386/avx-round-vec.c b/gcc/testsuite/gcc.target/i386/avx-round-vec.c new file mode 100644 index 0000000..d9514c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-round-vec.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "avx-check.h" + +#include <math.h> + +extern double floor (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +avx_test (void) +{ + double a[NUM]; + double r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = round (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != round (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-roundf-vec.c b/gcc/testsuite/gcc.target/i386/avx-roundf-vec.c new file mode 100644 index 0000000..ec4c166 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-roundf-vec.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "avx-check.h" + +#include <math.h> + +extern float roundf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +avx_test (void) +{ + float a[NUM]; + float r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = roundf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != roundf (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-round-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-round-vec.c new file mode 100644 index 0000000..dcd36cd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-round-vec.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "sse4_1-check.h" + +#include <math.h> + +extern double round (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +sse4_1_test (void) +{ + double a[NUM]; + double r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = round (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != round (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-roundf-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-roundf-vec.c new file mode 100644 index 0000000..d64660a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-roundf-vec.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "sse4_1-check.h" + +#include <math.h> + +extern float roundf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +sse4_1_test (void) +{ + float a[NUM]; + float r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = roundf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != roundf (a[i])) + abort(); +} |