diff options
author | Uros Bizjak <uros@gcc.gnu.org> | 2011-11-15 19:45:20 +0100 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2011-11-15 19:45:20 +0100 |
commit | 76a2c904dff6263556807f1a3e39521bea73f76d (patch) | |
tree | bc667d2621e3aa7fddbda489e9c417b2eedc661a /gcc | |
parent | 6bbb00938cd2301cd89a67c57be6ecacf79f66aa (diff) | |
download | gcc-76a2c904dff6263556807f1a3e39521bea73f76d.zip gcc-76a2c904dff6263556807f1a3e39521bea73f76d.tar.gz gcc-76a2c904dff6263556807f1a3e39521bea73f76d.tar.bz2 |
sse.md (vec_pack_trunc_v2df): Optimize sequence for AVX.
* config/i386/sse.md (vec_pack_trunc_v2df): Optimize sequence for AVX.
(vec_pack_sfix_trunc_v2df): Ditto.
(vec_pack_sfix_v2df): Ditto.
(vec_pack_sfix_trunc_v4df): Generate fix_truncv4dfv4si2 and
avx_vec_concatv8si patterns.
(vec_pack_sfix_v4df): Generate avx_cvtpd2dq256 and
avx_vec_concatv8si patterns.
testsuite/ChangeLog:
* gcc.target/i386/avx-floor-sfix-2-vec.c: New test.
* gcc.target/i386/avx-ceil-sfix-2-vec.c: Ditto.
* gcc.target/i386/avx-rint-sfix-2-vec.c: Ditto.
* gcc.target/i386/avx-round-sfix-2-vec.c: Ditto.
From-SVN: r181387
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 20 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 99 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-rint-sfix-2-vec.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-rint-sfix-vec.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-rintf-sfix-vec.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-round-sfix-2-vec.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-rint-sfix-vec.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-rintf-sfix-vec.c | 62 |
11 files changed, 486 insertions, 38 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ea952bd..c3abba4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2011-11-15 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/sse.md (vec_pack_trunc_v2df): Optimize sequence for AVX. + (vec_pack_sfix_trunc_v2df): Ditto. + (vec_pack_sfix_v2df): Ditto. + (vec_pack_sfix_trunc_v4df): Generate fix_truncv4dfv4si2 and + avx_vec_concatv8si patterns. + (vec_pack_sfix_v4df): Generate avx_cvtpd2dq256 and + avx_vec_concatv8si patterns. + 2011-11-15 Torvald Riegel <triegel@redhat.com> * c-parser.c (c_parser_transaction_expression): Require parentheses @@ -35,7 +45,7 @@ (avr_register_target_pragmas): New function. Register address space __pgm. (avr_cpu_cpp_builtins): Add built-in define __PGM. - + * config/avr/avr.c: Include "c-family/c-common.h". (TARGET_LEGITIMATE_ADDRESS_P): Remove define. (TARGET_LEGITIMIZE_ADDRESS): Remove define. @@ -71,7 +81,7 @@ cause (progmem or address space) when code wants to write to flash. (avr_section_type_flags): Unset section flag SECTION_BSS for data in progmem. - + * config/avr/predicates.md (nop_general_operand): New predicate. (nox_general_operand): New predicate. * config/avr/avr.md (LPM_REGNO): New define_constant. @@ -82,7 +92,8 @@ created MEM. (movqi_insn, *movhi, *movpsi, *movsi, *movsf): Change predicate #1 to nox_general_operand. - (ashrqi3, ashrhi3, ashrsi3): Change predicate #1 to nop_general_operand. + (ashrqi3, ashrhi3, ashrsi3): Change predicate #1 to + nop_general_operand. (ashlqi3, *ashlqi3, ashlhi3, ashlsi3): Ditto. (lshrqi3, *lshrqi3, lshrhi3, lshrsi3): Ditto. (split-lpmx): New split. @@ -137,7 +148,8 @@ 2011-11-14 Richard Henderson <rth@redhat.com> - * config/rs6000/rs6000.c (emit_load_locked): Assert the mode is handled. + * config/rs6000/rs6000.c (emit_load_locked): Assert the mode is + handled. (emit_store_conditional): Likewise. (rs6000_pre_atomic_barrier, rs6000_post_atomic_barrier): New. (rs6000_adjust_atomic_subword): New. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b476752..b8e821d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3038,14 +3038,25 @@ (match_operand:V2DF 2 "nonimmediate_operand" "")] "TARGET_SSE2" { - rtx r1, r2; + rtx tmp0, tmp1; + + if (TARGET_AVX && !TARGET_PREFER_AVX128) + { + tmp0 = gen_reg_rtx (V4DFmode); + tmp1 = force_reg (V2DFmode, operands[1]); - r1 = gen_reg_rtx (V4SFmode); - r2 = gen_reg_rtx (V4SFmode); + emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); + emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0)); + } + else + { + tmp0 = gen_reg_rtx (V4SFmode); + tmp1 = gen_reg_rtx (V4SFmode); - emit_insn (gen_sse2_cvtpd2ps (r1, operands[1])); - emit_insn (gen_sse2_cvtpd2ps (r2, operands[2])); - emit_insn (gen_sse_movlhps (operands[0], r1, r2)); + emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1])); + emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2])); + emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1)); + } DONE; }) @@ -3057,12 +3068,12 @@ { rtx r1, r2; - r1 = gen_reg_rtx (V8SImode); - r2 = gen_reg_rtx (V8SImode); + r1 = gen_reg_rtx (V4SImode); + r2 = gen_reg_rtx (V4SImode); - emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1])); - emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2])); - emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20))); + emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1])); + emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2])); + emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); DONE; }) @@ -3072,16 +3083,28 @@ (match_operand:V2DF 2 "nonimmediate_operand" "")] "TARGET_SSE2" { - rtx r1, r2; + rtx tmp0, tmp1; - r1 = gen_reg_rtx (V4SImode); - r2 = gen_reg_rtx (V4SImode); + if (TARGET_AVX && !TARGET_PREFER_AVX128) + { + tmp0 = gen_reg_rtx (V4DFmode); + tmp1 = force_reg (V2DFmode, operands[1]); - emit_insn (gen_sse2_cvttpd2dq (r1, operands[1])); - emit_insn (gen_sse2_cvttpd2dq (r2, operands[2])); - emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), - gen_lowpart (V2DImode, r1), - gen_lowpart (V2DImode, r2))); + emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); + emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0)); + } + else + { + tmp0 = gen_reg_rtx (V4SImode); + tmp1 = gen_reg_rtx (V4SImode); + + emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1])); + emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2])); + emit_insn + (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), + gen_lowpart (V2DImode, tmp0), + gen_lowpart (V2DImode, tmp1))); + } DONE; }) @@ -3126,12 +3149,12 @@ { rtx r1, r2; - r1 = gen_reg_rtx (V8SImode); - r2 = gen_reg_rtx (V8SImode); + r1 = gen_reg_rtx (V4SImode); + r2 = gen_reg_rtx (V4SImode); - emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1])); - emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2])); - emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20))); + emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1])); + emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2])); + emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); DONE; }) @@ -3141,16 +3164,28 @@ (match_operand:V2DF 2 "nonimmediate_operand" "")] "TARGET_SSE2" { - rtx r1, r2; + rtx tmp0, tmp1; - r1 = gen_reg_rtx (V4SImode); - r2 = gen_reg_rtx (V4SImode); + if (TARGET_AVX && !TARGET_PREFER_AVX128) + { + tmp0 = gen_reg_rtx (V4DFmode); + tmp1 = force_reg (V2DFmode, operands[1]); - emit_insn (gen_sse2_cvtpd2dq (r1, operands[1])); - emit_insn (gen_sse2_cvtpd2dq (r2, operands[2])); - emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), - gen_lowpart (V2DImode, r1), - gen_lowpart (V2DImode, r2))); + emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); + emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0)); + } + else + { + tmp0 = gen_reg_rtx (V4SImode); + tmp1 = gen_reg_rtx (V4SImode); + + emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1])); + emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2])); + emit_insn + (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), + gen_lowpart (V2DImode, tmp0), + gen_lowpart (V2DImode, tmp1))); + } DONE; }) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c7a33b1..876d918 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2011-11-15 Uros Bizjak <ubizjak@gmail.com> + + * gcc.target/i386/avx-floor-sfix-2-vec.c: New test. + * gcc.target/i386/avx-ceil-sfix-2-vec.c: Ditto. + * gcc.target/i386/avx-rint-sfix-2-vec.c: Ditto. + * gcc.target/i386/avx-round-sfix-2-vec.c: Ditto. + 2011-11-15 Iain Sandoe <iains@gcc.gnu.org> * lib/gcc-simulate-thread.exp (simulate-thread): Do not run on @@ -16,10 +23,10 @@ * gcc.target/i386/avx-recip-vec.c: New test. * gcc.target/i386/avx-lrintf-vec.c: Ditto. * gcc.target/i386/avx-lrint-vec.c: Ditto. - * gcc.target/i386/avx-ceilf-vec.c: Include sse4_1-ceilf-vec.c. - * gcc.target/i386/avx-ceil-vec.c: Include sse4_1-ceil-vec.c. * gcc.target/i386/avx-floorf-vec.c: Include sse4_1-floorf-vec.c. * gcc.target/i386/avx-floor-vec.c: Include sse4_1-floor-vec.c. + * gcc.target/i386/avx-ceilf-vec.c: Include sse4_1-ceilf-vec.c. + * gcc.target/i386/avx-ceil-vec.c: Include sse4_1-ceil-vec.c. * gcc.target/i386/avx-rintf-vec.c: Include sse4_1-rintf-vec.c. * gcc.target/i386/avx-rint-vec.c: Include sse4_1-rint-vec.c. * gcc.target/i386/avx-roundf-vec.c: Include sse4_1-roundf-vec.c. @@ -45,6 +52,10 @@ * gcc.target/i386/sse4_1-ceilf-sfix-vec.c: Ditto. * gcc.target/i386/avx-ceil-sfix-vec.c: Ditto. * gcc.target/i386/avx-ceilf-sfix-vec.c: Ditto. + * gcc.target/i386/sse4_1-rint-sfix-vec.c: Ditto. + * gcc.target/i386/sse4_1-rintf-sfix-vec.c: Ditto. + * gcc.target/i386/avx-rint-sfix-vec.c: Ditto. + * gcc.target/i386/avx-rintf-sfix-vec.c: Ditto. * gcc.target/i386/sse4_1-round-sfix-vec.c: Ditto. * gcc.target/i386/sse4_1-roundf-sfix-vec.c: Ditto. * gcc.target/i386/avx-round-sfix-vec.c: Ditto. diff --git a/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c b/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c new file mode 100644 index 0000000..bf48b80 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK_H + +#include <math.h> + +extern double ceil (double); + +#define NUM 4 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) ceil (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) ceil (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c b/gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c new file mode 100644 index 0000000..275199c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK_H + +#include <math.h> + +extern double floor (double); + +#define NUM 4 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) floor (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) floor (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-rint-sfix-2-vec.c b/gcc/testsuite/gcc.target/i386/avx-rint-sfix-2-vec.c new file mode 100644 index 0000000..9f273af --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-rint-sfix-2-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK_H + +#include <math.h> + +extern double rint (double); + +#define NUM 4 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) rint (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) rint (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-rint-sfix-vec.c b/gcc/testsuite/gcc.target/i386/avx-rint-sfix-vec.c new file mode 100644 index 0000000..824f2eb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-rint-sfix-vec.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-rint-sfix-vec.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-rintf-sfix-vec.c b/gcc/testsuite/gcc.target/i386/avx-rintf-sfix-vec.c new file mode 100644 index 0000000..e5ddf790 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-rintf-sfix-vec.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-rintf-sfix-vec.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-round-sfix-2-vec.c b/gcc/testsuite/gcc.target/i386/avx-round-sfix-2-vec.c new file mode 100644 index 0000000..ddb46d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-round-sfix-2-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK_H + +#include <math.h> + +extern double round (double); + +#define NUM 4 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) round (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) round (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-rint-sfix-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-rint-sfix-vec.c new file mode 100644 index 0000000..d9c2fbf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-rint-sfix-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <math.h> + +extern double rint (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) rint (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) rint (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-rintf-sfix-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-rintf-sfix-vec.c new file mode 100644 index 0000000..1d25f76 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-rintf-sfix-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <math.h> + +extern float rintf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + float a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) rintf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) rintf (a[i])) + abort(); +} |