aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2019-12-29 12:03:25 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2019-12-29 12:03:25 +0100
commit6ec067548fa994158819db0a62a8b5356d452c2c (patch)
treedc06b5ffaabf62e05636915869eaa80541497636 /gcc/config
parentf0657516067909d688dc2424a876c1c894cbc182 (diff)
downloadgcc-6ec067548fa994158819db0a62a8b5356d452c2c.zip
gcc-6ec067548fa994158819db0a62a8b5356d452c2c.tar.gz
gcc-6ec067548fa994158819db0a62a8b5356d452c2c.tar.bz2
re PR target/93078 (Missing fma and round functions auto-vectorization with x86-64 (sse2))
PR target/93078 * config/i386/i386-builtins.c (ix86_builtin_vectorized_function): Remove CASE_CFN_RINT handling. * config/i386/i386-builtin.def (IX86_BUILTIN_RINTPD, IX86_BUILTIN_RINTPS, IX86_BUILTIN_RINTPD256, IX86_BUILTIN_RINTPS256): Remove. * config/i386/sse.md (nearbyint<mode>2, rint<mode>2): New expanders with VF iterator. * gcc.target/i386/sse4_1-pr93078.c: New test. * gcc.target/i386/avx-pr93078.c: New test. * gcc.target/i386/avx512f-pr93078.c: New test. From-SVN: r279754
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386-builtin.def4
-rw-r--r--gcc/config/i386/i386-builtins.c21
-rw-r--r--gcc/config/i386/sse.md18
3 files changed, 18 insertions, 25 deletions
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index a6500f9..fd9c272 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -913,7 +913,6 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundss, "__builtin_ia32_round
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND)
@@ -924,7 +923,6 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND)
@@ -1047,7 +1045,6 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps2
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF)
@@ -1058,7 +1055,6 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND)
diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c
index 4646d044..1d4db2b 100644
--- a/gcc/config/i386/i386-builtins.c
+++ b/gcc/config/i386/i386-builtins.c
@@ -1661,27 +1661,6 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
}
break;
- CASE_CFN_RINT:
- /* The round insn does not trap on denormals. */
- if (flag_trapping_math || !TARGET_SSE4_1)
- break;
-
- if (out_mode == DFmode && in_mode == DFmode)
- {
- if (out_n == 2 && in_n == 2)
- return ix86_get_builtin (IX86_BUILTIN_RINTPD);
- else if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
- }
- if (out_mode == SFmode && in_mode == SFmode)
- {
- if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_RINTPS);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
- }
- break;
-
CASE_CFN_FMA:
if (out_mode == DFmode && in_mode == DFmode)
{
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index bbceb8b..b3ef215 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17977,6 +17977,24 @@
(set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
+(define_expand "nearbyint<mode>2"
+ [(set (match_operand:VF 0 "register_operand")
+ (unspec:VF
+ [(match_operand:VF 1 "vector_operand")
+ (match_dup 2)]
+ UNSPEC_ROUND))]
+ "TARGET_SSE4_1"
+ "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
+
+(define_expand "rint<mode>2"
+ [(set (match_operand:VF 0 "register_operand")
+ (unspec:VF
+ [(match_operand:VF 1 "vector_operand")
+ (match_dup 2)]
+ UNSPEC_ROUND))]
+ "TARGET_SSE4_1"
+ "operands[2] = GEN_INT (ROUND_MXCSR);")
+
(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(unspec:VF_128_256