aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Ivchenko <alexander.ivchenko@intel.com>2013-12-31 12:13:49 +0000
committerKirill Yukhin <kyukhin@gcc.gnu.org>2013-12-31 12:13:49 +0000
commit075691af6b69963626c2e242661f4d39131d6b25 (patch)
tree8140e3b1d8a8c8dae84e044a60dbb110e8e89f70
parentcf3e5a89aecd15ead7177517b7f3b6e3c4c66645 (diff)
downloadgcc-075691af6b69963626c2e242661f4d39131d6b25.zip
gcc-075691af6b69963626c2e242661f4d39131d6b25.tar.gz
gcc-075691af6b69963626c2e242661f4d39131d6b25.tar.bz2
avx512fintrin.h (_mm_add_round_sd): New.
gcc/ * config/i386/avx512fintrin.h (_mm_add_round_sd): New. (_mm_add_round_sd): Ditto. (_mm_add_round_ss): Ditto. (_mm_sub_round_sd): Ditto. (_mm_sub_round_ss): Ditto. (_mm_rcp14_sd): Ditto. (_mm_rcp14_ss): Ditto. (_mm_sqrt_round_sd): Ditto. (_mm_sqrt_round_ss): Ditto. (_mm_mul_round_sd): Ditto. (_mm_mul_round_ss): Ditto. (_mm_div_round_sd): Ditto. (_mm_div_round_ss): Ditto. (_mm_scalef_round_sd): Ditto. (_mm_scalef_round_ss): Ditto. (_mm_scalef_round_sd): Ditto. (_mm_scalef_round_ss): Ditto. (_mm_cvt_roundsd_ss): Ditto. (_mm_cvt_roundsd_sd): Ditto. (_mm_getexp_round_ss): Ditto. (_mm_getexp_round_sd): Ditto. (_mm_getmant_round_sd): Ditto. (_mm_getmant_round_ss): Ditto. (_mm_roundscale_round_ss): Ditto. (_mm_roundscale_round_sd): Ditto. (_mm_max_round_sd): Ditto. (_mm_max_round_ss): Ditto. (_mm_min_round_sd): Ditto. (_mm_min_round_ss): Ditto. (_mm_fmadd_round_sd): Ditto. (_mm_fmadd_round_ss): Ditto. (_mm_fmsub_round_sd): Ditto. (_mm_fmsub_round_ss): Ditto. (_mm_fnmadd_round_sd): Ditto. (_mm_fnmadd_round_ss): Ditto. (_mm_fnmsub_round_sd): Ditto. (_mm_fnmsub_round_ss): Ditto. (_mm_scalef_sd): Ditto. (_mm_scalef_ss): Ditto. (_mm_getexp_ss): Ditto. (_mm_getexp_sd): Ditto. (_mm_getmant_sd): Ditto. (_mm_getmant_ss): Ditto. (_mm_roundscale_ss): Ditto. (_mm_roundscale_sd): Ditto. * config/i386/i386-builtin-types.def: New types to support new built-ins: <V2DF, V2DF, V2DF, INT, INT>, <V4SF, V4SF, V4SF, INT, INT>, <(V4SF, V4SF, V2DF, INT>, <V2DF, V2DF, V4SF, INT>, <V4SF, V4SF, V4SF, V4SF, IN>. * config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_ADDSD_ROUND, IX86_BUILTIN_ADDSS_ROUND, IX86_BUILTIN_CVTSD2SS_ROUND, IX86_BUILTIN_CVTSS2SD_ROUND, IX86_BUILTIN_DIVSD_ROUND, IX86_BUILTIN_GETEXPSD128, IX86_BUILTIN_DIVSS_ROUND, IX86_BUILTIN_GETEXPSS128, IX86_BUILTIN_GETMANTSD128, IX86_BUILTIN_GETMANTSS128, IX86_BUILTIN_MAXSD_ROUND, IX86_BUILTIN_MAXSS_ROUND, IX86_BUILTIN_MINSD_ROUND, IX86_BUILTIN_MINSS_ROUND, IX86_BUILTIN_MULSD_ROUND, IX86_BUILTIN_MULSS_ROUND, IX86_BUILTIN_RCP14SD, IX86_BUILTIN_RCP14SS, IX86_BUILTIN_RNDSCALESD, IX86_BUILTIN_RNDSCALESS, IX86_BUILTIN_RSQRT14SD, IX86_BUILTIN_RSQRT14SS, IX86_BUILTIN_SCALEFSD, IX86_BUILTIN_SCALEFSS, IX86_BUILTIN_SQRTSD_ROUND, IX86_BUILTIN_SQRTSS_ROUND, IX86_BUILTIN_SUBSD_ROUND, IX86_BUILTIN_SUBSS_ROUND, IX86_BUILTIN_VFMADDSD3_ROUND, IX86_BUILTIN_VFMADDSS3_ROUND, IX86_BUILTIN_VFMSUBSD3_MASK3, IX86_BUILTIN_VFMSUBSS3_MASK3. (builtin_description bdesc_args[]): Add __builtin_ia32_rcp14sd, __builtin_ia32_rcp14ss, __builtin_ia32_rsqrt14pd512_mask, __builtin_ia32_rsqrt14ps512_mask, __builtin_ia32_rsqrt14sd, __builtin_ia32_rsqrt14ss, __builtin_ia32_addsd_round, __builtin_ia32_addss_round, __builtin_ia32_cvtsd2ss_round, __builtin_ia32_cvtss2sd_round, __builtin_ia32_divsd_round, __builtin_ia32_divss_round, __builtin_ia32_getexpsd128_round, __builtin_ia32_getexpss128_round, __builtin_ia32_getmantsd_round, __builtin_ia32_getmantss_round, __builtin_ia32_maxsd_round, __builtin_ia32_maxss_round, __builtin_ia32_minsd_round, __builtin_ia32_minss_round, __builtin_ia32_mulsd_round, __builtin_ia32_mulss_round, __builtin_ia32_rndscalesd_round, __builtin_ia32_rndscaless_round, __builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round, __builtin_ia32_sqrtsd_round, __builtin_ia32_sqrtss_round, __builtin_ia32_subsd_round, __builtin_ia32_subss_round, __builtin_ia32_vfmaddsd3_round, __builtin_ia32_vfmaddss3_round. (ix86_expand_round_builtin): Expand new FTYPEs. * config/i386/sse.md (<sse>_vm<plusminus_insn><mode>3): Support EVEX's embedded rouding. (<sse>_vm<multdiv_mnemonic><mode>3): Ditto. (<sse>_vmsqrt<mode>2): Ditto. (<sse>_vm<code><mode>3): Ditto. (sse2_cvtsd2ss): Ditto. (sse2_cvtss2sd): Ditto. (*avx512f_vmscalef<mode>): Ditto. (avx512f_sgetexp<mode>): Ditto. (*avx512f_rndscale<mode>): Ditto. (avx512f_getmant<mode>): Ditto. (*srcp14<mode>): Make visible. (*rsqrt14<mode>): Ditto. * config/i386/subst.md (mask_mode512bit_condition): Fix mode calculation. (sd_mask_mode512bit_condition): Ditto. (round_mode512bit_condition): Ditto. (round_modev4sf_condition): Ditto. (round_mask_scalar_operand3): Remove. (round_prefix): New. (round_saeonly_op3): Ditto. (round_saeonly_prefix): Ditto. testsuite/ * gcc.target/i386/avx-1.c: Update for AVX-512 scalar insns. * gcc.target/i386/avx512f-vaddsd-1.c: New. * gcc.target/i386/avx512f-vaddss-1.c: Ditto. * gcc.target/i386/avx512f-vcvtsd2ss-1.c: Ditto. * gcc.target/i386/avx512f-vcvtss2sd-1.c: Ditto. * gcc.target/i386/avx512f-vdivsd-1.c: Ditto. * gcc.target/i386/avx512f-vdivss-1.c: Ditto. * gcc.target/i386/avx512f-vextractf32x4-2.c: Ditto. * gcc.target/i386/avx512f-vextracti32x4-2.c: Ditto. * gcc.target/i386/avx512f-vfmaddXXXsd-1.c: Ditto. * gcc.target/i386/avx512f-vfmaddXXXss-1.c: Ditto. * gcc.target/i386/avx512f-vfmsubXXXsd-1.c: Ditto. * gcc.target/i386/avx512f-vfmsubXXXss-1.c: Ditto. * gcc.target/i386/avx512f-vfnmaddXXXsd-1.c: Ditto. * gcc.target/i386/avx512f-vfnmaddXXXss-1.c: Ditto. * gcc.target/i386/avx512f-vfnmsubXXXsd-1.c: Ditto. * gcc.target/i386/avx512f-vfnmsubXXXss-1.c: Ditto. * gcc.target/i386/avx512f-vgetexpsd-1.c: Ditto. * gcc.target/i386/avx512f-vgetexpsd-2.c: Ditto. * gcc.target/i386/avx512f-vgetexpss-1.c: Ditto. * gcc.target/i386/avx512f-vgetexpss-2.c: Ditto. * gcc.target/i386/avx512f-vgetmantsd-1.c: Ditto. * gcc.target/i386/avx512f-vgetmantsd-2.c: Ditto. * gcc.target/i386/avx512f-vgetmantss-1.c: Ditto. * gcc.target/i386/avx512f-vgetmantss-2.c: Ditto. * gcc.target/i386/avx512f-vmaxsd-1.c: Ditto. * gcc.target/i386/avx512f-vmaxss-1.c: Ditto. * gcc.target/i386/avx512f-vminsd-1.c: Ditto. * gcc.target/i386/avx512f-vminss-1.c: Ditto. * gcc.target/i386/avx512f-vmulsd-1.c: Ditto. * gcc.target/i386/avx512f-vmulss-1.c: Ditto. * gcc.target/i386/avx512f-vrcp14sd-1.c: Ditto. * gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto. * gcc.target/i386/avx512f-vrcp14ss-1.c: Ditto. * gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto. * gcc.target/i386/avx512f-vrndscalesd-1.c: Ditto. * gcc.target/i386/avx512f-vrndscalesd-2.c: Ditto. * gcc.target/i386/avx512f-vrndscaless-1.c: Ditto. * gcc.target/i386/avx512f-vrndscaless-2.c: Ditto. * gcc.target/i386/avx512f-vrsqrt14sd-1.c: Ditto. * gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto. * gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto. * gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto. * gcc.target/i386/avx512f-vscalefsd-1.c: Ditto. * gcc.target/i386/avx512f-vscalefsd-2.c: Ditto. * gcc.target/i386/avx512f-vscalefss-1.c: Ditto. * gcc.target/i386/avx512f-vscalefss-2.c: Ditto. * gcc.target/i386/avx512f-vsqrtsd-1.c: Ditto. * gcc.target/i386/avx512f-vsqrtss-1.c: Ditto. * gcc.target/i386/avx512f-vsubsd-1.c: Ditto. * gcc.target/i386/avx512f-vsubss-1.c: Ditto. * gcc.target/i386/sse-14.c: Update for AVX-512 scalar insns. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/testimm-10.c: Ditto. Co-Authored-By: Andrey Turetskiy <andrey.turetskiy@intel.com> Co-Authored-By: Anna Tikhonova <anna.tikhonova@intel.com> Co-Authored-By: Ilya Tocar <ilya.tocar@intel.com> Co-Authored-By: Ilya Verbin <ilya.verbin@intel.com> Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com> Co-Authored-By: Maxim Kuznetsov <maxim.kuznetsov@intel.com> Co-Authored-By: Michael Zolotukhin <michael.v.zolotukhin@intel.com> Co-Authored-By: Sergey Lega <sergey.s.lega@intel.com> From-SVN: r206265
-rw-r--r--gcc/ChangeLog117
-rw-r--r--gcc/config/i386/avx512fintrin.h543
-rw-r--r--gcc/config/i386/i386-builtin-types.def5
-rw-r--r--gcc/config/i386/i386.c75
-rw-r--r--gcc/config/i386/sse.md78
-rw-r--r--gcc/config/i386/subst.md12
-rw-r--r--gcc/testsuite/ChangeLog65
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-1.c38
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-2.c36
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-2.c36
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-2.c94
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-2.c99
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c37
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c39
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-14.c46
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/testimm-10.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/testround-1.c63
62 files changed, 2198 insertions, 65 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 73c4762..2ffd959 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -16,6 +16,123 @@
Kirill Yukhin <kirill.yukhin@intel.com>
Michael Zolotukhin <michael.v.zolotukhin@intel.com>
+ * config/i386/avx512fintrin.h (_mm_add_round_sd): New.
+ (_mm_add_round_sd): Ditto.
+ (_mm_add_round_ss): Ditto.
+ (_mm_sub_round_sd): Ditto.
+ (_mm_sub_round_ss): Ditto.
+ (_mm_rcp14_sd): Ditto.
+ (_mm_rcp14_ss): Ditto.
+ (_mm_sqrt_round_sd): Ditto.
+ (_mm_sqrt_round_ss): Ditto.
+ (_mm_mul_round_sd): Ditto.
+ (_mm_mul_round_ss): Ditto.
+ (_mm_div_round_sd): Ditto.
+ (_mm_div_round_ss): Ditto.
+ (_mm_scalef_round_sd): Ditto.
+ (_mm_scalef_round_ss): Ditto.
+ (_mm_scalef_round_sd): Ditto.
+ (_mm_scalef_round_ss): Ditto.
+ (_mm_cvt_roundsd_ss): Ditto.
+ (_mm_cvt_roundsd_sd): Ditto.
+ (_mm_getexp_round_ss): Ditto.
+ (_mm_getexp_round_sd): Ditto.
+ (_mm_getmant_round_sd): Ditto.
+ (_mm_getmant_round_ss): Ditto.
+ (_mm_roundscale_round_ss): Ditto.
+ (_mm_roundscale_round_sd): Ditto.
+ (_mm_max_round_sd): Ditto.
+ (_mm_max_round_ss): Ditto.
+ (_mm_min_round_sd): Ditto.
+ (_mm_min_round_ss): Ditto.
+ (_mm_fmadd_round_sd): Ditto.
+ (_mm_fmadd_round_ss): Ditto.
+ (_mm_fmsub_round_sd): Ditto.
+ (_mm_fmsub_round_ss): Ditto.
+ (_mm_fnmadd_round_sd): Ditto.
+ (_mm_fnmadd_round_ss): Ditto.
+ (_mm_fnmsub_round_sd): Ditto.
+ (_mm_fnmsub_round_ss): Ditto.
+ (_mm_scalef_sd): Ditto.
+ (_mm_scalef_ss): Ditto.
+ (_mm_getexp_ss): Ditto.
+ (_mm_getexp_sd): Ditto.
+ (_mm_getmant_sd): Ditto.
+ (_mm_getmant_ss): Ditto.
+ (_mm_roundscale_ss): Ditto.
+ (_mm_roundscale_sd): Ditto.
+ * config/i386/i386-builtin-types.def: New types to support
+ new built-ins: <V2DF, V2DF, V2DF, INT, INT>, <V4SF, V4SF, V4SF, INT, INT>,
+ <(V4SF, V4SF, V2DF, INT>, <V2DF, V2DF, V4SF, INT>,
+ <V4SF, V4SF, V4SF, V4SF, IN>.
+ * config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_ADDSD_ROUND,
+ IX86_BUILTIN_ADDSS_ROUND, IX86_BUILTIN_CVTSD2SS_ROUND,
+ IX86_BUILTIN_CVTSS2SD_ROUND, IX86_BUILTIN_DIVSD_ROUND,
+ IX86_BUILTIN_GETEXPSD128, IX86_BUILTIN_DIVSS_ROUND,
+ IX86_BUILTIN_GETEXPSS128, IX86_BUILTIN_GETMANTSD128,
+ IX86_BUILTIN_GETMANTSS128, IX86_BUILTIN_MAXSD_ROUND,
+ IX86_BUILTIN_MAXSS_ROUND, IX86_BUILTIN_MINSD_ROUND,
+ IX86_BUILTIN_MINSS_ROUND, IX86_BUILTIN_MULSD_ROUND,
+ IX86_BUILTIN_MULSS_ROUND, IX86_BUILTIN_RCP14SD,
+ IX86_BUILTIN_RCP14SS, IX86_BUILTIN_RNDSCALESD,
+ IX86_BUILTIN_RNDSCALESS, IX86_BUILTIN_RSQRT14SD,
+ IX86_BUILTIN_RSQRT14SS, IX86_BUILTIN_SCALEFSD,
+ IX86_BUILTIN_SCALEFSS, IX86_BUILTIN_SQRTSD_ROUND,
+ IX86_BUILTIN_SQRTSS_ROUND, IX86_BUILTIN_SUBSD_ROUND,
+ IX86_BUILTIN_SUBSS_ROUND, IX86_BUILTIN_VFMADDSD3_ROUND,
+ IX86_BUILTIN_VFMADDSS3_ROUND, IX86_BUILTIN_VFMSUBSD3_MASK3,
+ IX86_BUILTIN_VFMSUBSS3_MASK3.
+ (builtin_description bdesc_args[]): Add
+ __builtin_ia32_rcp14sd, __builtin_ia32_rcp14ss,
+ __builtin_ia32_rsqrt14pd512_mask, __builtin_ia32_rsqrt14ps512_mask,
+ __builtin_ia32_rsqrt14sd, __builtin_ia32_rsqrt14ss,
+ __builtin_ia32_addsd_round, __builtin_ia32_addss_round,
+ __builtin_ia32_cvtsd2ss_round, __builtin_ia32_cvtss2sd_round,
+ __builtin_ia32_divsd_round, __builtin_ia32_divss_round,
+ __builtin_ia32_getexpsd128_round, __builtin_ia32_getexpss128_round,
+ __builtin_ia32_getmantsd_round, __builtin_ia32_getmantss_round,
+ __builtin_ia32_maxsd_round, __builtin_ia32_maxss_round,
+ __builtin_ia32_minsd_round, __builtin_ia32_minss_round,
+ __builtin_ia32_mulsd_round, __builtin_ia32_mulss_round,
+ __builtin_ia32_rndscalesd_round, __builtin_ia32_rndscaless_round,
+ __builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round,
+ __builtin_ia32_sqrtsd_round, __builtin_ia32_sqrtss_round,
+ __builtin_ia32_subsd_round, __builtin_ia32_subss_round,
+ __builtin_ia32_vfmaddsd3_round, __builtin_ia32_vfmaddss3_round.
+ (ix86_expand_round_builtin): Expand new FTYPEs.
+ * config/i386/sse.md (<sse>_vm<plusminus_insn><mode>3): Support
+ EVEX's embedded rouding.
+ (<sse>_vm<multdiv_mnemonic><mode>3): Ditto.
+ (<sse>_vmsqrt<mode>2): Ditto.
+ (<sse>_vm<code><mode>3): Ditto.
+ (sse2_cvtsd2ss): Ditto.
+ (sse2_cvtss2sd): Ditto.
+ (*avx512f_vmscalef<mode>): Ditto.
+ (avx512f_sgetexp<mode>): Ditto.
+ (*avx512f_rndscale<mode>): Ditto.
+ (avx512f_getmant<mode>): Ditto.
+ (*srcp14<mode>): Make visible.
+ (*rsqrt14<mode>): Ditto.
+ * config/i386/subst.md (mask_mode512bit_condition): Fix
+ mode calculation.
+ (sd_mask_mode512bit_condition): Ditto.
+ (round_mode512bit_condition): Ditto.
+ (round_modev4sf_condition): Ditto.
+ (round_mask_scalar_operand3): Remove.
+ (round_prefix): New.
+ (round_saeonly_op3): Ditto.
+ (round_saeonly_prefix): Ditto.
+
+2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com>
+ Maxim Kuznetsov <maxim.kuznetsov@intel.com>
+ Sergey Lega <sergey.s.lega@intel.com>
+ Anna Tikhonova <anna.tikhonova@intel.com>
+ Ilya Tocar <ilya.tocar@intel.com>
+ Andrey Turetskiy <andrey.turetskiy@intel.com>
+ Ilya Verbin <ilya.verbin@intel.com>
+ Kirill Yukhin <kirill.yukhin@intel.com>
+ Michael Zolotukhin <michael.v.zolotukhin@intel.com>
+
* common/config/i386/i386-common.c (OPTION_MASK_ISA_SHA_SET): New.
(OPTION_MASK_ISA_SHA_UNSET): Ditto.
(ix86_handle_option): Handle OPT_msha.
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index f717d46..40e8213 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -1279,6 +1279,57 @@ _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
}
#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+#else
+#define _mm_add_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_addsd_round(A, B, C)
+
+#define _mm_add_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_addss_round(A, B, C)
+
+#define _mm_sub_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_subsd_round(A, B, C)
+
+#define _mm_sub_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_subss_round(A, B, C)
+#endif
+
+#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
@@ -1424,6 +1475,22 @@ _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
(__mmask16) __U);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
+ (__v2df) __B);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
+ (__v4sf) __B);
+}
+
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt14_pd (__m512d __A)
@@ -1482,6 +1549,22 @@ _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
(__mmask16) __U);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
+ (__v2df) __B);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
+ (__v4sf) __B);
+}
+
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -1542,6 +1625,23 @@ _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
(__mmask16) __U, __R);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
+ (__v2df) __A,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
+ (__v4sf) __A,
+ __R);
+}
#else
#define _mm512_sqrt_round_pd(A, C) \
(__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
@@ -1560,6 +1660,12 @@ _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
#define _mm512_maskz_sqrt_round_ps(U, A, C) \
(__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_sqrt_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
+
+#define _mm_sqrt_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_sqrtss_round(A, B, C)
#endif
extern __inline __m512i
@@ -2159,6 +2265,42 @@ _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
(__mmask16) __U, __R);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
#else
#define _mm512_mul_round_pd(A, B, C) \
(__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
@@ -2195,6 +2337,18 @@ _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
#define _mm512_maskz_div_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_mul_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_mulsd_round(A, B, C)
+
+#define _mm_mul_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_mulss_round(A, B, C)
+
+#define _mm_div_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_divsd_round(A, B, C)
+
+#define _mm_div_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_divss_round(A, B, C)
#endif
#ifdef __OPTIMIZE__
@@ -2438,6 +2592,23 @@ _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
(__mmask16) __U, __R);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
#else
#define _mm512_scalef_round_pd(A, B, C) \
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
@@ -2456,6 +2627,12 @@ _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_scalef_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
+
+#define _mm_scalef_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_scalefss_round(A, B, C)
#endif
#ifdef __OPTIMIZE__
@@ -7578,6 +7755,23 @@ _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
(__mmask8) __U, __R);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
+ (__v4sf) __B,
+ __R);
+}
#else
#define _mm512_cvt_roundpd_ps(A, B) \
(__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), -1, B)
@@ -7587,6 +7781,12 @@ _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
(__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
+
+#define _mm_cvt_roundsd_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
+
+#define _mm_cvt_roundss_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
#endif
extern __inline void
@@ -7611,6 +7811,24 @@ _mm512_stream_pd (double *__P, __m512d __A)
}
#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getexp_round_ps (__m512 __A, const int __R)
@@ -7759,6 +7977,30 @@ _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
__U, __R);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_sd (__m128d __A, __m128d __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+ return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_ss (__m128 __A, __m128 __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+ return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ __R);
+}
+
#else
#define _mm512_getmant_round_pd(X, B, C, R) \
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
@@ -7800,6 +8042,24 @@ _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
(__v16sf)(__m512)_mm512_setzero_ps(), \
(__mmask16)(U),\
(R)))
+#define _mm_getmant_round_sd(X, Y, C, D, R) \
+ ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (R)))
+
+#define _mm_getmant_round_ss(X, Y, C, D, R) \
+ ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (R)))
+
+#define _mm_getexp_round_ss(A, B, R) \
+ ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
+
+#define _mm_getexp_round_sd(A, B, R) \
+ ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
+
#define _mm512_getexp_round_ps(A, R) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), (__mmask16)-1, R))
@@ -7885,6 +8145,24 @@ _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
_mm512_setzero_pd (),
(__mmask8) __A, __R);
}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
+{
+ return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
+ (__v4sf) __B, __imm, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
+ (__v2df) __B, __imm, __R);
+}
+
#else
#define _mm512_roundscale_round_ps(A, B, R) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
@@ -7912,6 +8190,12 @@ _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
(int)(C), \
(__v8df)_mm512_setzero_pd(),\
(__mmask8)(A), R))
+#define _mm_roundscale_round_ss(A, B, C, R) \
+ ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), R))
+#define _mm_roundscale_round_sd(A, B, C, R) \
+ ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), R))
#endif
extern __inline __m512
@@ -9825,6 +10109,57 @@ _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
(__mmask16) __U);
}
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+#else
+#define _mm_max_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_addsd_round(A, B, C)
+
+#define _mm_max_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_addss_round(A, B, C)
+
+#define _mm_min_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_subsd_round(A, B, C)
+
+#define _mm_min_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_subss_round(A, B, C)
+#endif
+
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
@@ -9862,6 +10197,112 @@ _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
}
#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ __R);
+}
+#else
+#define _mm_fmadd_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
+
+#define _mm_fmadd_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
+
+#define _mm_fmsub_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
+
+#define _mm_fmsub_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
+
+#define _mm_fnmadd_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
+
+#define _mm_fnmadd_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
+
+#define _mm_fnmsub_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
+
+#define _mm_fnmsub_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
+#endif
+
+#ifdef __OPTIMIZE__
extern __inline int
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
@@ -10436,6 +10877,24 @@ _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
+ (__v2df) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
@@ -11784,6 +12243,24 @@ _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
+ (__v4sf) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
+ (__v2df) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
@@ -11856,6 +12333,28 @@ _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
_MM_FROUND_CUR_DIRECTION);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
#else
#define _mm512_getmant_pd(X, B, C) \
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
@@ -11897,6 +12396,26 @@ _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
(__v16sf)(__m512)_mm512_setzero_ps(), \
(__mmask16)(U),\
_MM_FROUND_CUR_DIRECTION))
+#define _mm_getmant_sd(X, Y, C, D) \
+ ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getmant_ss(X, Y, C, D) \
+ ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getexp_ss(A, B) \
+ ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getexp_sd(A, B) \
+ ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
+ _MM_FROUND_CUR_DIRECTION))
+
#define _mm512_getexp_ps(A) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
@@ -11987,6 +12506,24 @@ _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
_MM_FROUND_CUR_DIRECTION);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
+ (__v2df) __B, __imm,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
#else
#define _mm512_roundscale_ps(A, B) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
@@ -12014,6 +12551,12 @@ _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
(int)(C), \
(__v8df)_mm512_setzero_pd(),\
(__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_ss(A, B, C) \
+ ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_sd(A, B, C) \
+ ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
#endif
#ifdef __OPTIMIZE__
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 86ad31e..d19ca84 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -516,6 +516,7 @@ DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, INT)
DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI)
DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, DI, INT)
@@ -531,6 +532,9 @@ DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V4HI, V4HI, HI, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT)
DEF_FUNCTION_TYPE (V4SI, V4SI, SI, INT)
@@ -678,6 +682,7 @@ DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI, INT)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4899fdb..be0364d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -27931,6 +27931,8 @@ enum ix86_builtins
/* AVX512F */
IX86_BUILTIN_ADDPD512,
IX86_BUILTIN_ADDPS512,
+ IX86_BUILTIN_ADDSD_ROUND,
+ IX86_BUILTIN_ADDSS_ROUND,
IX86_BUILTIN_ALIGND512,
IX86_BUILTIN_ALIGNQ512,
IX86_BUILTIN_BLENDMD512,
@@ -27965,9 +27967,11 @@ enum ix86_builtins
IX86_BUILTIN_CVTPS2PD512,
IX86_BUILTIN_CVTPS2PH512,
IX86_BUILTIN_CVTPS2UDQ512,
+ IX86_BUILTIN_CVTSD2SS_ROUND,
IX86_BUILTIN_CVTSI2SD64,
IX86_BUILTIN_CVTSI2SS32,
IX86_BUILTIN_CVTSI2SS64,
+ IX86_BUILTIN_CVTSS2SD_ROUND,
IX86_BUILTIN_CVTTPD2DQ512,
IX86_BUILTIN_CVTTPD2UDQ512,
IX86_BUILTIN_CVTTPS2DQ512,
@@ -27980,6 +27984,8 @@ enum ix86_builtins
IX86_BUILTIN_CVTUSI2SS64,
IX86_BUILTIN_DIVPD512,
IX86_BUILTIN_DIVPS512,
+ IX86_BUILTIN_DIVSD_ROUND,
+ IX86_BUILTIN_DIVSS_ROUND,
IX86_BUILTIN_EXPANDPD512,
IX86_BUILTIN_EXPANDPD512Z,
IX86_BUILTIN_EXPANDPDLOAD512,
@@ -28002,8 +28008,12 @@ enum ix86_builtins
IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
IX86_BUILTIN_GETEXPPD512,
IX86_BUILTIN_GETEXPPS512,
+ IX86_BUILTIN_GETEXPSD128,
+ IX86_BUILTIN_GETEXPSS128,
IX86_BUILTIN_GETMANTPD512,
IX86_BUILTIN_GETMANTPS512,
+ IX86_BUILTIN_GETMANTSD128,
+ IX86_BUILTIN_GETMANTSS128,
IX86_BUILTIN_INSERTF32X4,
IX86_BUILTIN_INSERTF64X4,
IX86_BUILTIN_INSERTI32X4,
@@ -28016,8 +28026,12 @@ enum ix86_builtins
IX86_BUILTIN_LOADUPS512,
IX86_BUILTIN_MAXPD512,
IX86_BUILTIN_MAXPS512,
+ IX86_BUILTIN_MAXSD_ROUND,
+ IX86_BUILTIN_MAXSS_ROUND,
IX86_BUILTIN_MINPD512,
IX86_BUILTIN_MINPS512,
+ IX86_BUILTIN_MINSD_ROUND,
+ IX86_BUILTIN_MINSS_ROUND,
IX86_BUILTIN_MOVAPD512,
IX86_BUILTIN_MOVAPS512,
IX86_BUILTIN_MOVDDUP512,
@@ -28034,6 +28048,8 @@ enum ix86_builtins
IX86_BUILTIN_MOVSLDUP512,
IX86_BUILTIN_MULPD512,
IX86_BUILTIN_MULPS512,
+ IX86_BUILTIN_MULSD_ROUND,
+ IX86_BUILTIN_MULSS_ROUND,
IX86_BUILTIN_PABSD512,
IX86_BUILTIN_PABSQ512,
IX86_BUILTIN_PADDD512,
@@ -28144,12 +28160,20 @@ enum ix86_builtins
IX86_BUILTIN_PXORQ512,
IX86_BUILTIN_RCP14PD512,
IX86_BUILTIN_RCP14PS512,
+ IX86_BUILTIN_RCP14SD,
+ IX86_BUILTIN_RCP14SS,
IX86_BUILTIN_RNDSCALEPD,
IX86_BUILTIN_RNDSCALEPS,
+ IX86_BUILTIN_RNDSCALESD,
+ IX86_BUILTIN_RNDSCALESS,
IX86_BUILTIN_RSQRT14PD512,
IX86_BUILTIN_RSQRT14PS512,
+ IX86_BUILTIN_RSQRT14SD,
+ IX86_BUILTIN_RSQRT14SS,
IX86_BUILTIN_SCALEFPD512,
IX86_BUILTIN_SCALEFPS512,
+ IX86_BUILTIN_SCALEFSD,
+ IX86_BUILTIN_SCALEFSS,
IX86_BUILTIN_SHUFPD512,
IX86_BUILTIN_SHUFPS512,
IX86_BUILTIN_SHUF_F32x4,
@@ -28160,6 +28184,8 @@ enum ix86_builtins
IX86_BUILTIN_SQRTPD512_MASK,
IX86_BUILTIN_SQRTPS512_MASK,
IX86_BUILTIN_SQRTPS_NR512,
+ IX86_BUILTIN_SQRTSD_ROUND,
+ IX86_BUILTIN_SQRTSS_ROUND,
IX86_BUILTIN_STOREAPD512,
IX86_BUILTIN_STOREAPS512,
IX86_BUILTIN_STOREDQUDI512,
@@ -28168,6 +28194,8 @@ enum ix86_builtins
IX86_BUILTIN_STOREUPS512,
IX86_BUILTIN_SUBPD512,
IX86_BUILTIN_SUBPS512,
+ IX86_BUILTIN_SUBSD_ROUND,
+ IX86_BUILTIN_SUBSS_ROUND,
IX86_BUILTIN_UCMPD512,
IX86_BUILTIN_UCMPQ512,
IX86_BUILTIN_UNPCKHPD512,
@@ -28196,6 +28224,8 @@ enum ix86_builtins
IX86_BUILTIN_VFMADDPS512_MASK,
IX86_BUILTIN_VFMADDPS512_MASK3,
IX86_BUILTIN_VFMADDPS512_MASKZ,
+ IX86_BUILTIN_VFMADDSD3_ROUND,
+ IX86_BUILTIN_VFMADDSS3_ROUND,
IX86_BUILTIN_VFMADDSUBPD512_MASK,
IX86_BUILTIN_VFMADDSUBPD512_MASK3,
IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
@@ -28206,6 +28236,8 @@ enum ix86_builtins
IX86_BUILTIN_VFMSUBADDPS512_MASK3,
IX86_BUILTIN_VFMSUBPD512_MASK3,
IX86_BUILTIN_VFMSUBPS512_MASK3,
+ IX86_BUILTIN_VFMSUBSD3_MASK3,
+ IX86_BUILTIN_VFMSUBSS3_MASK3,
IX86_BUILTIN_VFNMADDPD512_MASK,
IX86_BUILTIN_VFNMADDPS512_MASK,
IX86_BUILTIN_VFNMSUBPD512_MASK,
@@ -29885,8 +29917,12 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
@@ -29966,6 +30002,8 @@ static const struct builtin_description bdesc_round_args[] =
/* AVX512F */
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
@@ -29980,9 +30018,11 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
{ OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
{ OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
@@ -29993,6 +30033,8 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
@@ -30003,22 +30045,40 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
{ OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
@@ -30041,6 +30101,8 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
@@ -34070,6 +34132,10 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V4SF_FTYPE_V4SF_INT_INT:
case V4SF_FTYPE_V4SF_INT64_INT:
case V2DF_FTYPE_V2DF_INT64_INT:
+ case V4SF_FTYPE_V4SF_V4SF_INT:
+ case V2DF_FTYPE_V2DF_V2DF_INT:
+ case V4SF_FTYPE_V4SF_V2DF_INT:
+ case V2DF_FTYPE_V2DF_V4SF_INT:
nargs = 3;
break;
case V8SF_FTYPE_V8DF_V8SF_QI_INT:
@@ -34080,6 +34146,13 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V16SI_FTYPE_V16SF_V16SI_HI_INT:
case V8DF_FTYPE_V8SF_V8DF_QI_INT:
case V16SF_FTYPE_V16HI_V16SF_HI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
+ case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
+ nargs = 4;
+ break;
+ case V4SF_FTYPE_V4SF_V4SF_INT_INT:
+ case V2DF_FTYPE_V2DF_V2DF_INT_INT:
+ nargs_constant = 2;
nargs = 4;
break;
case INT_FTYPE_V4SF_V4SF_INT_INT:
@@ -34143,6 +34216,8 @@ ix86_expand_round_builtin (const struct builtin_description *d,
{
case CODE_FOR_avx512f_getmantv8df_mask_round:
case CODE_FOR_avx512f_getmantv16sf_mask_round:
+ case CODE_FOR_avx512f_getmantv2df_round:
+ case CODE_FOR_avx512f_getmantv4sf_round:
error ("the immediate argument must be a 4-bit immediate");
return const0_rtx;
case CODE_FOR_avx512f_cmpv8df3_mask_round:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5005a47..d75edb7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1307,21 +1307,21 @@
(set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<plusminus_insn><mode>3"
+(define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(plusminus:VF_128
(match_operand:VF_128 1 "register_operand" "0,v")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"@
<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<round_prefix>")
(set_attr "mode" "<ssescalarmode>")])
(define_expand "mul<mode>3<mask_name><round_name>"
@@ -1347,21 +1347,21 @@
(set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<multdiv_mnemonic><mode>3"
+(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(multdiv:VF_128
(match_operand:VF_128 1 "register_operand" "0,v")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"@
<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse<multdiv_mnemonic>")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<round_prefix>")
(set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<ssescalarmode>")])
@@ -1447,7 +1447,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "*srcp14<mode>"
+(define_insn "srcp14<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -1457,7 +1457,7 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|, %1, %2}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -1494,21 +1494,21 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vmsqrt<mode>2"
+(define_insn "<sse>_vmsqrt<mode>2<round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(sqrt:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "xm,vm"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
(match_operand:VF_128 2 "register_operand" "0,v")
(const_int 1)))]
"TARGET_SSE"
"@
sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
- vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
+ vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "atom_sse_attr" "sqrt")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<round_prefix>")
(set_attr "btver2_sse_attr" "sqrt")
(set_attr "mode" "<ssescalarmode>")])
@@ -1543,7 +1543,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "*rsqrt14<mode>"
+(define_insn "rsqrt14<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -1624,22 +1624,22 @@
(set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<code><mode>3"
+(define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(smaxmin:VF_128
(match_operand:VF_128 1 "register_operand" "0,v")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"@
<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "btver2_sse_attr" "maxmin")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<round_saeonly_prefix>")
(set_attr "mode" "<ssescalarmode>")])
;; These versions of the min/max patterns implement exactly the operations
@@ -4108,34 +4108,34 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "sse2_cvtsd2ss"
+(define_insn "sse2_cvtsd2ss<round_name>"
[(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
(vec_merge:V4SF
(vec_duplicate:V4SF
(float_truncate:V2SF
- (match_operand:V2DF 2 "nonimmediate_operand" "x,m,vm")))
+ (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
(match_operand:V4SF 1 "register_operand" "0,0,v")
(const_int 1)))]
"TARGET_SSE2"
"@
cvtsd2ss\t{%2, %0|%0, %2}
cvtsd2ss\t{%2, %0|%0, %q2}
- vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
+ vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double,*")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "bdver1_decode" "direct,direct,*")
(set_attr "btver2_decode" "double,double,double")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,<round_prefix>")
(set_attr "mode" "SF")])
-(define_insn "sse2_cvtss2sd"
+(define_insn "sse2_cvtss2sd<round_saeonly_name>"
[(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
(vec_merge:V2DF
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 2 "nonimmediate_operand" "x,m,vm")
+ (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
(parallel [(const_int 0) (const_int 1)])))
(match_operand:V2DF 1 "register_operand" "0,0,v")
(const_int 1)))]
@@ -4143,14 +4143,14 @@
"@
cvtss2sd\t{%2, %0|%0, %2}
cvtss2sd\t{%2, %0|%0, %k2}
- vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
+ vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "athlon_decode" "direct,direct,*")
(set_attr "bdver1_decode" "direct,direct,*")
(set_attr "btver2_decode" "double,double,double")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
(set_attr "mode" "DF")])
(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
@@ -6553,17 +6553,17 @@
operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
})
-(define_insn "*avx512f_vmscalef<mode>"
+(define_insn "avx512f_vmscalef<mode><round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_SCALEF)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "%vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
@@ -6633,17 +6633,17 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_sgetexp<mode>"
+(define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")]
UNSPEC_GETEXP)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vgetexp<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+ "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
@@ -6798,18 +6798,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "*avx512f_rndscale<mode>"
+(define_insn "avx512f_rndscale<mode><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_ROUND)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
[(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -15184,18 +15184,18 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_getmant<mode>"
+(define_insn "avx512f_getmant<mode><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 3 "const_0_to_15_operand")]
UNSPEC_GETMANT)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vgetmant<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 4a6d477..487b749 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -51,7 +51,7 @@
(define_subst_attr "mask_operand18" "mask" "" "%{%19%}%N18")
(define_subst_attr "mask_operand19" "mask" "" "%{%20%}%N19")
(define_subst_attr "mask_codefor" "mask" "*" "")
-(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64)")
+(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(GET_MODE_SIZE (<MODE>mode) == 64)")
(define_subst_attr "store_mask_constraint" "mask" "vm" "v")
(define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" "register_operand")
(define_subst_attr "mask_prefix" "mask" "vex" "evex")
@@ -85,7 +85,7 @@
(define_subst_attr "sd_mask_op4" "sd" "" "%{%5%}%N4")
(define_subst_attr "sd_mask_op5" "sd" "" "%{%6%}%N5")
(define_subst_attr "sd_mask_codefor" "sd" "*" "")
-(define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64)")
+(define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(GET_MODE_SIZE (<MODE>mode) == 64)")
(define_subst "sd"
[(set (match_operand:SUBST_V 0)
@@ -101,7 +101,6 @@
(define_subst_attr "round_name" "round" "" "_round")
(define_subst_attr "round_mask_operand2" "mask" "%R2" "%R4")
(define_subst_attr "round_mask_operand3" "mask" "%R3" "%R5")
-(define_subst_attr "round_mask_scalar_operand3" "mask_scalar" "%R3" "%R5")
(define_subst_attr "round_sd_mask_operand4" "sd" "%R4" "%R6")
(define_subst_attr "round_op2" "round" "" "%R2")
(define_subst_attr "round_op3" "round" "" "%R3")
@@ -116,8 +115,9 @@
(define_subst_attr "round_constraint2" "round" "m" "v")
(define_subst_attr "round_constraint3" "round" "rm" "r")
(define_subst_attr "round_nimm_predicate" "round" "nonimmediate_operand" "register_operand")
-(define_subst_attr "round_mode512bit_condition" "round" "1" "(GET_MODE (operands[0]) == V16SFmode || GET_MODE (operands[0]) == V8DFmode)")
-(define_subst_attr "round_modev4sf_condition" "round" "1" "(GET_MODE (operands[0]) == V4SFmode)")
+(define_subst_attr "round_prefix" "round" "vex" "evex")
+(define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode || <MODE>mode == V8DFmode)")
+(define_subst_attr "round_modev4sf_condition" "round" "1" "(<MODE>mode == V4SFmode)")
(define_subst_attr "round_codefor" "round" "*" "")
(define_subst_attr "round_opnum" "round" "5" "6")
@@ -138,9 +138,11 @@
(define_subst_attr "round_saeonly_mask_scalar_merge_operand4" "mask_scalar_merge" "%R4" "%R5")
(define_subst_attr "round_saeonly_sd_mask_operand5" "sd" "%R5" "%R7")
(define_subst_attr "round_saeonly_op2" "round_saeonly" "" "%R2")
+(define_subst_attr "round_saeonly_op3" "round_saeonly" "" "%R3")
(define_subst_attr "round_saeonly_op4" "round_saeonly" "" "%R4")
(define_subst_attr "round_saeonly_op5" "round_saeonly" "" "%R5")
(define_subst_attr "round_saeonly_op6" "round_saeonly" "" "%R6")
+(define_subst_attr "round_saeonly_prefix" "round_saeonly" "vex" "evex")
(define_subst_attr "round_saeonly_mask_op2" "round_saeonly" "" "<round_saeonly_mask_operand2>")
(define_subst_attr "round_saeonly_mask_op3" "round_saeonly" "" "<round_saeonly_mask_operand3>")
(define_subst_attr "round_saeonly_mask_scalar_op3" "round_saeonly" "" "<round_saeonly_mask_scalar_operand3>")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 1ad8186..74c8179 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -13,6 +13,71 @@
Kirill Yukhin <kirill.yukhin@intel.com>
Michael Zolotukhin <michael.v.zolotukhin@intel.com>
+ * gcc.target/i386/avx-1.c: Update for AVX-512 scalar insns.
+ * gcc.target/i386/avx512f-vaddsd-1.c: New.
+ * gcc.target/i386/avx512f-vaddss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vcvtsd2ss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vcvtss2sd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vdivsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vdivss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vextractf32x4-2.c: Ditto.
+ * gcc.target/i386/avx512f-vextracti32x4-2.c: Ditto.
+ * gcc.target/i386/avx512f-vfmaddXXXsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vfmaddXXXss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vfmsubXXXsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vfmsubXXXss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vfnmaddXXXsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vfnmaddXXXss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vfnmsubXXXsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vfnmsubXXXss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vgetexpsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vgetexpsd-2.c: Ditto.
+ * gcc.target/i386/avx512f-vgetexpss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vgetexpss-2.c: Ditto.
+ * gcc.target/i386/avx512f-vgetmantsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vgetmantsd-2.c: Ditto.
+ * gcc.target/i386/avx512f-vgetmantss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vgetmantss-2.c: Ditto.
+ * gcc.target/i386/avx512f-vmaxsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vmaxss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vminsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vminss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vmulsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vmulss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vrcp14sd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto.
+ * gcc.target/i386/avx512f-vrcp14ss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto.
+ * gcc.target/i386/avx512f-vrndscalesd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vrndscalesd-2.c: Ditto.
+ * gcc.target/i386/avx512f-vrndscaless-1.c: Ditto.
+ * gcc.target/i386/avx512f-vrndscaless-2.c: Ditto.
+ * gcc.target/i386/avx512f-vrsqrt14sd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto.
+ * gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto.
+ * gcc.target/i386/avx512f-vscalefsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vscalefsd-2.c: Ditto.
+ * gcc.target/i386/avx512f-vscalefss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vscalefss-2.c: Ditto.
+ * gcc.target/i386/avx512f-vsqrtsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vsqrtss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vsubsd-1.c: Ditto.
+ * gcc.target/i386/avx512f-vsubss-1.c: Ditto.
+ * gcc.target/i386/sse-14.c: Update for AVX-512 scalar insns.
+ * gcc.target/i386/sse-23.c: Ditto.
+ * gcc.target/i386/testimm-10.c: Ditto.
+
+2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com>
+ Maxim Kuznetsov <maxim.kuznetsov@intel.com>
+ Sergey Lega <sergey.s.lega@intel.com>
+ Anna Tikhonova <anna.tikhonova@intel.com>
+ Ilya Tocar <ilya.tocar@intel.com>
+ Andrey Turetskiy <andrey.turetskiy@intel.com>
+ Ilya Verbin <ilya.verbin@intel.com>
+ Kirill Yukhin <kirill.yukhin@intel.com>
+ Michael Zolotukhin <michael.v.zolotukhin@intel.com>
+
* gcc.target/i386/avx-1.c: Add define for __builtin_ia32_sha1rnds4.
* gcc.target/i386/i386.exp (check_effective_target_sha): New.
* gcc.target/i386/sha-check.h: New file.
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 0d38f30..7201592 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -169,6 +169,8 @@
/* avx512fintrin.h */
#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 1)
+#define __builtin_ia32_addss_round(A, B, C) __builtin_ia32_addss_round(A, B, 1)
#define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E)
#define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E)
#define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D)
@@ -184,11 +186,11 @@
#define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1)
#define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5)
#define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1)
-#define __builtin_ia32_cvtsd2ss_mask(A, B, C, D, E) __builtin_ia32_cvtsd2ss_mask(A, B, C, D, 1)
+#define __builtin_ia32_cvtsd2ss_round(A, B, C) __builtin_ia32_cvtsd2ss_round(A, B, 1)
+#define __builtin_ia32_cvtss2sd_round(A, B, C) __builtin_ia32_cvtss2sd_round(A, B, 4)
#define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1)
#define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1)
#define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1)
-#define __builtin_ia32_cvtss2sd_mask(A, B, C, D, E) __builtin_ia32_cvtss2sd_mask(A, B, C, D, 5)
#define __builtin_ia32_cvttpd2dq512_mask(A, B, C, D) __builtin_ia32_cvttpd2dq512_mask(A, B, C, 5)
#define __builtin_ia32_cvttpd2udq512_mask(A, B, C, D) __builtin_ia32_cvttpd2udq512_mask(A, B, C, 5)
#define __builtin_ia32_cvttps2dq512_mask(A, B, C, D) __builtin_ia32_cvttps2dq512_mask(A, B, C, 5)
@@ -199,6 +201,8 @@
#define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1)
#define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_divsd_round(A, B, C) __builtin_ia32_divsd_round(A, B, 1)
+#define __builtin_ia32_divss_round(A, B, C) __builtin_ia32_divss_round(A, B, 1)
#define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D)
#define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D)
#define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D)
@@ -221,18 +225,28 @@
#define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1)
#define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5)
#define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5)
+#define __builtin_ia32_getexpsd128_round(A, B, C) __builtin_ia32_getexpsd128_round(A, B, 4)
+#define __builtin_ia32_getexpss128_round(A, B, C) __builtin_ia32_getexpss128_round(A, B, 4)
#define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5)
#define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5)
+#define __builtin_ia32_getmantsd_round(A, B, C, D) __builtin_ia32_getmantsd_round(A, B, 1, 4)
+#define __builtin_ia32_getmantss_round(A, B, C, D) __builtin_ia32_getmantss_round(A, B, 1, 4)
#define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E)
#define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E)
#define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E)
#define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E)
#define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5)
#define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4)
+#define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4)
#define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5)
#define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5)
+#define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4)
+#define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4)
#define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 1)
+#define __builtin_ia32_mulss_round(A, B, C) __builtin_ia32_mulss_round(A, B, 1)
#define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D)
#define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D)
#define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D)
@@ -252,10 +266,12 @@
#define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
#define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5)
#define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5)
-#define __builtin_ia32_rndscalesd_mask(A, B, I, D, E, F) __builtin_ia32_rndscalesd_mask(A, B, 1, D, E, 5)
-#define __builtin_ia32_rndscaless_mask(A, B, I, D, E, F) __builtin_ia32_rndscaless_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_rndscalesd_round(A, B, C, D) __builtin_ia32_rndscalesd_round(A, B, 1, 4)
+#define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4)
#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 1)
+#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 1)
#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1)
#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1)
#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1)
@@ -272,10 +288,12 @@
#define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E)
#define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1)
#define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1)
-#define __builtin_ia32_sqrtsd_mask(A, B, C, D, E) __builtin_ia32_sqrtsd_mask(A, B, C, D, 1)
-#define __builtin_ia32_sqrtss_mask(A, B, C, D, E) __builtin_ia32_sqrtss_mask(A, B, C, D, 1)
+#define __builtin_ia32_sqrtss_round(A, B, C) __builtin_ia32_sqrtss_round(A, B, 1)
+#define __builtin_ia32_sqrtsd_round(A, B, C) __builtin_ia32_sqrtsd_round(A, B, 1)
#define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 1)
+#define __builtin_ia32_subss_round(A, B, C) __builtin_ia32_subss_round(A, B, 1)
#define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D)
#define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D)
#define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5)
@@ -304,12 +322,8 @@
#define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_round(A, B, C, D) __builtin_ia32_vfmaddsd3_round(A, B, C, 1)
+#define __builtin_ia32_vfmaddss3_round(A, B, C, D) __builtin_ia32_vfmaddss3_round(A, B, C, 1)
#define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1)
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c
new file mode 100644
index 0000000..f0bc5ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_add_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c
new file mode 100644
index 0000000..5a8491c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_add_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-1.c
new file mode 100644
index 0000000..8cb51c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtsd2ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 s1, r;
+volatile __m128d s2;
+
+void extern
+avx512f_test (void)
+{
+ r = _mm_cvt_roundsd_ss (s1, s2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-1.c
new file mode 100644
index 0000000..5b6a43f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtss2sd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d s1, r;
+volatile __m128 s2;
+
+void extern
+avx512f_test (void)
+{
+ r = _mm_cvt_roundss_sd (s1, s2, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c
new file mode 100644
index 0000000..95df56c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_div_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c
new file mode 100644
index 0000000..5c6eb94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_div_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-2.c
new file mode 100644
index 0000000..35377b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void
+CALC (UNION_TYPE (AVX512F_LEN,) s1, float *res_ref, int mask)
+{
+ memset (res_ref, 0, 16);
+ memcpy (res_ref, s1.a + mask * 4, 16);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s1;
+ union128 res1, res2, res3;
+ float res_ref[4];
+ MASK_TYPE mask = MASK_VALUE;
+ int j;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j / 4.56;
+ }
+
+ for (j = 0; j < 4; j++)
+ {
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_extractf32x4_ps) (s1.x, 1);
+ res2.x = INTRINSIC (_mask_extractf32x4_ps) (res2.x, mask, s1.x, 1);
+ res3.x = INTRINSIC (_maskz_extractf32x4_ps) (mask, s1.x, 1);
+ CALC (s1, res_ref, 1);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, 4);
+ if (check_union128 (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, 4);
+ if (check_union128 (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-2.c
new file mode 100644
index 0000000..1ea77b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void
+CALC (UNION_TYPE (AVX512F_LEN, i_d) s1, int *res_ref, int mask)
+{
+ memset (res_ref, 0, 16);
+ memcpy (res_ref, s1.a + mask * 4, 16);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1;
+ union128i_d res1, res2, res3;
+ int res_ref[4];
+ MASK_TYPE mask = MASK_VALUE;
+ int j;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j / 4.56;
+ }
+
+ for (j = 0; j < 4; j++)
+ {
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_extracti32x4_epi32) (s1.x, 1);
+ res2.x =
+ INTRINSIC (_mask_extracti32x4_epi32) (res2.x, mask, s1.x, 1);
+ res3.x = INTRINSIC (_maskz_extracti32x4_epi32) (mask, s1.x, 1);
+ CALC (s1, res_ref, 1);
+
+ if (check_union128i_d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, 4);
+ if (check_union128i_d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, 4);
+ if (check_union128i_d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c
new file mode 100644
index 0000000..ea8b17c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d a, b, c;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_fmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c
new file mode 100644
index 0000000..cd44fb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 a, b, c;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_fmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c
new file mode 100644
index 0000000..2d78df6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d a, b, c;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_fmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c
new file mode 100644
index 0000000..b7609f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 a, b, c;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_fmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c
new file mode 100644
index 0000000..e938236
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d a, b, c;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_fnmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c
new file mode 100644
index 0000000..f5752e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 a, b, c;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_fnmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c
new file mode 100644
index 0000000..931b5d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d a, b, c;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_fnmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c
new file mode 100644
index 0000000..f097f1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 a, b, c;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_fnmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-1.c
new file mode 100644
index 0000000..952ed54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vgetexpsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetexpsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_getexp_sd (x, x);
+ x = _mm_getexp_round_sd (x, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-2.c
new file mode 100644
index 0000000..c1e5e5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-2.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 64)
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vgetexpsd (double *s, double *r)
+{
+ r[0] = floor (log (s[0]) / log (2));
+}
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union128d res1, s1;
+ double res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 5.0 - i;
+ res_ref[i] = s1.a[i];
+ }
+
+ res1.x = _mm_getexp_sd (s1.x, s1.x);
+
+ compute_vgetexpsd (s1.a, res_ref);
+
+ if (check_fp_union128d (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-1.c
new file mode 100644
index 0000000..d946a47
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vgetexpss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetexpss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_getexp_ss (x, x);
+ x = _mm_getexp_round_ss (x, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-2.c
new file mode 100644
index 0000000..39d77c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-2.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 32)
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vgetexpss (float *s, float *r)
+{
+ r[0] = floor (log (s[0]) / log (2));
+}
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union128 res1, s1;
+ float res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 5.0 - i;
+ res_ref[i] = s1.a[i];
+ }
+
+ res1.x = _mm_getexp_ss (s1.x, s1.x);
+
+ compute_vgetexpss (s1.a, res_ref);
+
+ if (check_fp_union128 (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-1.c
new file mode 100644
index 0000000..4b252a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vgetmantsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x, y, z;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_getmant_sd (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
+ x = _mm_getmant_round_sd (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src,
+ _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-2.c
new file mode 100644
index 0000000..50d98a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-2.c
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+#include <math.h>
+
+union fp_int_t
+{
+ long long int int_val;
+ double fp_val;
+};
+
+double
+get_norm_mant (double source, int signctrl, int interv)
+{
+ long long src, sign, exp, fraction;
+
+ union fp_int_t bin_conv;
+
+ bin_conv.fp_val = source;
+ src = bin_conv.int_val;
+ sign = (signctrl & 0x1) ? 0 : (src >> 63);
+ exp = (src & 0x7ff0000000000000) >> 52;
+ fraction = (src & 0xfffffffffffff);
+
+ if (isnan (source))
+ return signbit (source) ? -NAN : NAN;
+ if (source == 0.0 || source == -0.0 || isinf (source))
+ return sign ? -1.0 : 1.0;
+ if (signbit (source) && (signctrl & 0x2))
+ return -NAN;
+ if (!isnormal (source))
+ {
+ src = (src & 0xfff7ffffffffffff);
+ exp = 0x3ff;
+ while (!(src & 0x8000000000000))
+ {
+ src += fraction & 0x8000000000000;
+ fraction = fraction << 1;
+ exp--;
+ }
+ }
+
+ switch (interv)
+ {
+ case 0:
+ exp = 0x3ff;
+ break;
+ case 1:
+ exp = ((exp - 0x3ff) & 0x1) ? 0x3fe : 0x3ff;
+ break;
+ case 2:
+ exp = 0x3fe;
+ break;
+ case 3:
+ exp = (fraction & 0x8000000000000) ? 0x3fe : 0x3ff;
+ break;
+ default:
+ abort ();
+ }
+
+ bin_conv.int_val = (sign << 63) | (exp << 52) | fraction;
+ return bin_conv.fp_val;
+}
+
+static void
+compute_vgetmantsd (double *r, double *s1, double *s2, int interv,
+ int signctrl)
+{
+ r[0] = get_norm_mant (s2[0], signctrl, interv);
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ int i, sign;
+ union128d res1, src1, src2;
+ double res_ref[2];
+ int interv = _MM_MANT_NORM_p5_1;
+ int signctrl = _MM_MANT_SIGN_src;
+
+ src1.x = _mm_set_pd (-3.0, 111.111);
+ src2.x = _mm_set_pd (222.222, -2.0);
+
+ res1.x = _mm_getmant_sd (src1.x, src2.x, interv, signctrl);
+
+ compute_vgetmantsd (res_ref, src1.a, src2.a, interv, signctrl);
+
+ if (check_union128d (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-1.c
new file mode 100644
index 0000000..30c837b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vgetmantss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x, y, z;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_getmant_ss (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
+ x = _mm_getmant_round_ss (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src,
+ _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-2.c
new file mode 100644
index 0000000..291c0df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-2.c
@@ -0,0 +1,99 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+#include <math.h>
+
+union fp_int_t
+{
+ int int_val;
+ float fp_val;
+};
+
+float
+get_norm_mant (float source, int signctrl, int interv)
+{
+ int src, sign, exp, fraction;
+ union fp_int_t bin_conv;
+
+ bin_conv.fp_val = source;
+ src = bin_conv.int_val;
+ sign = (signctrl & 0x1) ? 0 : (src >> 31);
+ exp = (src & 0x7f800000) >> 23;
+ fraction = (src & 0x7fffff);
+
+ if (isnan (source))
+ return signbit (source) ? -NAN : NAN;
+ if (source == 0.0 || source == -0.0 || isinf (source))
+ return sign ? -1.0 : 1.0;
+ if (signbit (source) && (signctrl & 0x2))
+ return -NAN;
+ if (!isnormal (source))
+ {
+ src = (src & 0xffbfffff);
+ exp = 0x7f;
+ while (!(src & 0x400000))
+ {
+ src += fraction & 0x400000;
+ fraction = fraction << 1;
+ exp--;
+ }
+ }
+
+ switch (interv)
+ {
+ case 0:
+ exp = 0x7f;
+ break;
+ case 1:
+ exp = ((exp - 0x7f) & 0x1) ? 0x7e : 0x7f;
+ break;
+ case 2:
+ exp = 0x7e;
+ break;
+ case 3:
+ exp = (fraction & 0x400000) ? 0x7e : 0x7f;
+ break;
+ default:
+ abort ();
+ }
+
+ bin_conv.int_val = (sign << 31) | (exp << 23) | fraction;
+
+ return bin_conv.fp_val;
+
+}
+
+static void
+compute_vgetmantss (float *r, float *s1, float *s2, int interv,
+ int signctrl)
+{
+ int i;
+ r[0] = get_norm_mant (s2[0], signctrl, interv);
+ for (i = 1; i < 4; i++)
+ {
+ r[i] = s1[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i, sign;
+ union128 res1, src1, src2;
+ float res_ref[4];
+ int interv = _MM_MANT_NORM_p5_1;
+ int signctrl = _MM_MANT_SIGN_src;
+
+ src1.x = _mm_set_ps (-24.043, 68.346, -43.35, 546.46);
+ src2.x = _mm_set_ps (222.222, 333.333, 444.444, -2.0);
+
+ res1.x = _mm_getmant_ss (src1.x, src2.x, interv, signctrl);
+
+ compute_vgetmantss (res_ref, src1.a, src2.a, interv, signctrl);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c
new file mode 100644
index 0000000..8c24704
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_max_round_sd (x1, x2, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c
new file mode 100644
index 0000000..027445d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_max_round_ss (x1, x2, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c
new file mode 100644
index 0000000..8f8488f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_min_round_sd (x1, x2, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c
new file mode 100644
index 0000000..0774b75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_min_round_ss (x1, x2, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c
new file mode 100644
index 0000000..c85832a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mul_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c
new file mode 100644
index 0000000..cb4bf0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mul_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c
new file mode 100644
index 0000000..c0c8d03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrcp14sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_rcp14_sd (x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
new file mode 100644
index 0000000..9ff3541
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vrcp14sd (double *s1, double *s2, double *r)
+{
+ r[0] = 1.0 / s2[0];
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, s2, res1, res2, res3;
+ double res_ref[2];
+
+ s1.x = _mm_set_pd (-3.0, 111.111);
+ s2.x = _mm_set_pd (222.222, -2.0);
+ res2.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_rcp14_sd (s1.x, s2.x);
+
+ compute_vrcp14sd (s1.a, s2.a, res_ref);
+
+ if (check_union128d (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c
new file mode 100644
index 0000000..580dfd6a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrcp14ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_rcp14_ss (x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
new file mode 100644
index 0000000..fe8989a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vrcp14ss (float *s1, float *s2, float *r)
+{
+ r[0] = 1.0 / s2[0];
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, s2, res1, res2, res3;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.043, 68.346, -43.35, 546.46);
+ s2.x = _mm_set_ps (222.222, 333.333, 444.444, -2.0);
+ res2.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_rcp14_ss (s1.x, s2.x);
+
+ compute_vrcp14ss (s1.a, s2.a, res_ref);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-1.c
new file mode 100644
index 0000000..2f370a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_roundscale_sd (x1, x2, 0x42);
+ x1 = _mm_roundscale_round_sd (x1, x2, 0x42, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-2.c
new file mode 100644
index 0000000..5b4e842
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 64)
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+static void
+compute_rndscalesd (double *s1, double *s2, double *r, int imm)
+{
+ int rc, m;
+ rc = imm & 0xf;
+ m = imm >> 4;
+
+ switch (rc)
+ {
+ case _MM_FROUND_FLOOR:
+ r[0] = floor (s2[0] * pow (2, m)) / pow (2, m);
+ break;
+ case _MM_FROUND_CEIL:
+ r[0] = ceil (s2[0] * pow (2, m)) / pow (2, m);
+ break;
+ default:
+ abort ();
+ break;
+ }
+
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ int imm = _MM_FROUND_FLOOR | (7 << 4);
+ union128d s1, s2, res1;
+ double res_ref[SIZE];
+
+ s1.x = _mm_set_pd (4.05084, -1.23162);
+ s2.x = _mm_set_pd (-3.53222, 7.33527);
+
+ res1.x = _mm_roundscale_sd (s1.x, s2.x, imm);
+
+ compute_rndscalesd (s1.a, s2.a, res_ref, imm);
+
+ if (check_union128d (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-1.c
new file mode 100644
index 0000000..c9f5a75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_roundscale_ss (x1, x2, 0x42);
+ x1 = _mm_roundscale_round_ss (x1, x2, 0x42, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-2.c
new file mode 100644
index 0000000..7acfe4c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 32)
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+static void
+compute_rndscaless (float *s1, float *s2, float *r, int imm)
+{
+ int rc, m;
+ rc = imm & 0xf;
+ m = imm >> 4;
+
+ switch (rc)
+ {
+ case _MM_FROUND_FLOOR:
+ r[0] = floorf (s2[0] * pow (2, m)) / pow (2, m);
+ break;
+ case _MM_FROUND_CEIL:
+ r[0] = ceilf (s2[0] * pow (2, m)) / pow (2, m);
+ break;
+ default:
+ abort ();
+ break;
+ }
+
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ int imm = _MM_FROUND_FLOOR | (7 << 4);
+ union128 s1, s2, res1;
+ float res_ref[SIZE];
+
+ s1.x = _mm_set_ps (4.05084, -1.23162, 2.00231, -6.22103);
+ s2.x = _mm_set_ps (-4.19319, -3.53222, 7.33527, 5.57655);
+
+ res1.x = _mm_roundscale_ss (s1.x, s2.x, imm);
+
+ compute_rndscaless (s1.a, s2.a, res_ref, imm);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c
new file mode 100644
index 0000000..bd8b7a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_rsqrt14_sd (x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c
new file mode 100644
index 0000000..ef4e407
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vrsqrt14sd (double *s1, double *s2, double *r)
+{
+ r[0] = 1.0 / sqrt (s2[0]);
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, s2, res1, res2, res3;
+ double res_ref[2];
+
+ s1.x = _mm_set_pd (-3.0, 111.111);
+ s2.x = _mm_set_pd (222.222, 4.0);
+ res2.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_rsqrt14_sd (s1.x, s2.x);
+
+ compute_vrsqrt14sd (s1.a, s2.a, res_ref);
+
+ if (check_fp_union128d (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c
new file mode 100644
index 0000000..d4d4eea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_rsqrt14_ss (x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c
new file mode 100644
index 0000000..b01420f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vrsqrt14ss (float *s1, float *s2, float *r)
+{
+ r[0] = 1.0 / sqrt (s2[0]);
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, s2, res1, res2, res3;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ s2.x = _mm_set_ps (222.222, 333.333, 444.444, 4.0);
+ res2.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_rsqrt14_ss (s1.x, s2.x);
+
+ compute_vrsqrt14ss (s1.a, s2.a, res_ref);
+
+ if (check_fp_union128 (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c
new file mode 100644
index 0000000..bbf238e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_scalef_sd (x, x);
+ x = _mm_scalef_round_sd (x, x, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c
new file mode 100644
index 0000000..131fc67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+
+static void
+compute_scalefsd (double *s1, double *s2, double *r)
+{
+ r[0] = s1[0] * pow (2, floor (s2[0]));
+ r[1] = s1[1];
+}
+
+void static
+avx512f_test (void)
+{
+ union128d res1, s1, s2;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 11.5 * (i + 1);
+ s2.a[i] = 10.5 * (i + 1);
+ }
+
+ res1.x = _mm_scalef_sd (s1.x, s2.x);
+
+ compute_scalefsd (s1.a, s2.a, res_ref);
+
+ if (check_union128d (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c
new file mode 100644
index 0000000..d36b2ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_scalef_ss (x, x);
+ x = _mm_scalef_round_ss (x, x, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c
new file mode 100644
index 0000000..3e8f6d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+
+static void
+compute_scalefss (float *s1, float *s2, float *r)
+{
+ r[0] = s1[0] * (float) pow (2, floor (s2[0]));
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 res1, s1, s2;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 11.5 * (i + 1);
+ s2.a[i] = 10.5 * (i + 1);
+ }
+
+ res1.x = _mm_scalef_ss (s1.x, s2.x);
+
+ compute_scalefss (s1.a, s2.a, res_ref);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c
new file mode 100644
index 0000000..5814e3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vsqrtsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_sqrt_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c
new file mode 100644
index 0000000..81e8a0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vsqrtss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_sqrt_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c
new file mode 100644
index 0000000..511ceb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_sub_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c
new file mode 100644
index 0000000..618662f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_sub_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index e8cb533..c5d8876 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -199,6 +199,7 @@ test_1x (_mm512_getmant_pd, __m512d, __m512d, 1, 1)
test_1x (_mm512_getmant_ps, __m512, __m512, 1, 1)
test_1x (_mm512_roundscale_round_pd, __m512d, __m512d, 1, 5)
test_1x (_mm512_roundscale_round_ps, __m512, __m512, 1, 5)
+test_1x (_mm_cvt_roundi32_ss, __m128, __m128, 1, 1)
test_2 (_mm512_add_round_pd, __m512d, __m512d, __m512d, 1)
test_2 (_mm512_add_round_ps, __m512, __m512, __m512, 1)
test_2 (_mm512_alignr_epi32, __m512i, __m512i, __m512i, 1)
@@ -278,16 +279,45 @@ test_2 (_mm512_shuffle_pd, __m512d, __m512d, __m512d, 1)
test_2 (_mm512_shuffle_ps, __m512, __m512, __m512, 1)
test_2 (_mm512_sub_round_pd, __m512d, __m512d, __m512d, 1)
test_2 (_mm512_sub_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm_add_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_add_round_ss, __m128, __m128, __m128, 1)
test_2 (_mm_cmp_sd_mask, __mmask8, __m128d, __m128d, 1)
test_2 (_mm_cmp_ss_mask, __mmask8, __m128, __m128, 1)
#ifdef __x86_64__
+test_2 (_mm_cvt_roundi64_sd, __m128d, __m128d, long long, 1)
+test_2 (_mm_cvt_roundi64_ss, __m128, __m128, long long, 1)
#endif
+test_2 (_mm_cvt_roundsd_ss, __m128, __m128, __m128d, 1)
+test_2 (_mm_cvt_roundss_sd, __m128d, __m128d, __m128, 5)
+test_2 (_mm_cvt_roundu32_ss, __m128, __m128, unsigned, 1)
#ifdef __x86_64__
+test_2 (_mm_cvt_roundu64_sd, __m128d, __m128d, unsigned long long, 1)
+test_2 (_mm_cvt_roundu64_ss, __m128, __m128, unsigned long long, 1)
#endif
+test_2 (_mm_div_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_div_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_getexp_round_sd, __m128d, __m128d, __m128d, 5)
+test_2 (_mm_getexp_round_ss, __m128, __m128, __m128, 5)
+test_2y (_mm_getmant_round_sd, __m128d, __m128d, __m128d, 1, 1, 5)
+test_2y (_mm_getmant_round_ss, __m128, __m128, __m128, 1, 1, 5)
+test_2 (_mm_mul_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_mul_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_scalef_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_scalef_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_sqrt_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_sqrt_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_sub_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_sub_round_ss, __m128, __m128, __m128, 1)
test_2x (_mm512_cmp_round_pd_mask, __mmask8, __m512d, __m512d, 1, 5)
test_2x (_mm512_cmp_round_ps_mask, __mmask16, __m512, __m512, 1, 5)
test_2x (_mm512_maskz_roundscale_round_pd, __m512d, __mmask8, __m512d, 1, 5)
test_2x (_mm512_maskz_roundscale_round_ps, __m512, __mmask16, __m512, 1, 5)
+test_2x (_mm_cmp_round_sd_mask, __mmask8, __m128d, __m128d, 1, 5)
+test_2x (_mm_cmp_round_ss_mask, __mmask8, __m128, __m128, 1, 5)
+test_2x (_mm_comi_round_sd, int, __m128d, __m128d, 1, 5)
+test_2x (_mm_comi_round_ss, int, __m128, __m128, 1, 5)
+test_2x (_mm_roundscale_round_sd, __m128d, __m128d, __m128d, 1, 5)
+test_2x (_mm_roundscale_round_ss, __m128, __m128, __m128, 1, 5)
test_3 (_mm512_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
test_3 (_mm512_fmadd_round_ps, __m512, __m512, __m512, __m512, 1)
test_3 (_mm512_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
@@ -373,6 +403,14 @@ test_3 (_mm512_maskz_sub_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
test_3 (_mm512_maskz_sub_round_ps, __m512, __mmask16, __m512, __m512, 1)
test_3 (_mm512_ternarylogic_epi32, __m512i, __m512i, __m512i, __m512i, 1)
test_3 (_mm512_ternarylogic_epi64, __m512i, __m512i, __m512i, __m512i, 1)
+test_3 (_mm_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fmadd_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fmsub_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fnmadd_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fnmsub_round_ss, __m128, __m128, __m128, __m128, 1)
test_3 (_mm_mask_cmp_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1)
test_3 (_mm_mask_cmp_ss_mask, __mmask8, __mmask8, __m128, __m128, 1)
test_3v (_mm512_i32scatter_epi32, void *, __m512i, __m512i, 1)
@@ -385,6 +423,10 @@ test_3v (_mm512_i64scatter_pd, void *, __m512i, __m512d, 1)
test_3v (_mm512_i64scatter_ps, void *, __m512i, __m256, 1)
test_3x (_mm512_mask_roundscale_round_pd, __m512d, __m512d, __mmask8, __m512d, 1, 5)
test_3x (_mm512_mask_roundscale_round_ps, __m512, __m512, __mmask16, __m512, 1, 5)
+test_3x (_mm_fixupimm_round_sd, __m128d, __m128d, __m128d, __m128i, 1, 5)
+test_3x (_mm_fixupimm_round_ss, __m128, __m128, __m128, __m128i, 1, 5)
+test_3x (_mm_mask_cmp_round_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1, 5)
+test_3x (_mm_mask_cmp_round_ss_mask, __mmask8, __mmask8, __m128, __m128, 1, 5)
test_4 (_mm512_mask3_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
test_4 (_mm512_mask3_fmadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
test_4 (_mm512_mask3_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
@@ -471,6 +513,10 @@ test_4x (_mm512_mask_fixupimm_round_pd, __m512d, __m512d, __mmask8, __m512d, __m
test_4x (_mm512_mask_fixupimm_round_ps, __m512, __m512, __mmask16, __m512, __m512i, 1, 5)
test_4x (_mm512_maskz_fixupimm_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512i, 1, 5)
test_4x (_mm512_maskz_fixupimm_round_ps, __m512, __mmask16, __m512, __m512, __m512i, 1, 5)
+test_4x (_mm_mask_fixupimm_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128i, 1, 5)
+test_4x (_mm_mask_fixupimm_round_ss, __m128, __m128, __mmask8, __m128, __m128i, 1, 5)
+test_4x (_mm_maskz_fixupimm_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128i, 1, 5)
+test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 5)
/* avx512pfintrin.h */
test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 0123538..a6a7b39 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -186,6 +186,8 @@
/* avx512fintrin.h */
#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 1)
+#define __builtin_ia32_addss_round(A, B, C) __builtin_ia32_addss_round(A, B, 1)
#define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E)
#define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E)
#define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D)
@@ -201,6 +203,8 @@
#define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1)
#define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5)
#define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtsd2ss_round(A, B, C) __builtin_ia32_cvtsd2ss_round(A, B, 1)
+#define __builtin_ia32_cvtss2sd_round(A, B, C) __builtin_ia32_cvtss2sd_round(A, B, 4)
#define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1)
#define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1)
#define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1)
@@ -214,6 +218,8 @@
#define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1)
#define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_divsd_round(A, B, C) __builtin_ia32_divsd_round(A, B, 1)
+#define __builtin_ia32_divss_round(A, B, C) __builtin_ia32_divss_round(A, B, 1)
#define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D)
#define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D)
#define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D)
@@ -236,18 +242,28 @@
#define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1)
#define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5)
#define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5)
+#define __builtin_ia32_getexpsd128_round(A, B, C) __builtin_ia32_getexpsd128_round(A, B, 4)
+#define __builtin_ia32_getexpss128_round(A, B, C) __builtin_ia32_getexpss128_round(A, B, 4)
#define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5)
#define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5)
+#define __builtin_ia32_getmantsd_round(A, B, C, D) __builtin_ia32_getmantsd_round(A, B, 1, 4)
+#define __builtin_ia32_getmantss_round(A, B, C, D) __builtin_ia32_getmantss_round(A, B, 1, 4)
#define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E)
#define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E)
#define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E)
#define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E)
#define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5)
#define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4)
+#define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4)
#define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5)
#define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5)
+#define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4)
+#define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4)
#define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 1)
+#define __builtin_ia32_mulss_round(A, B, C) __builtin_ia32_mulss_round(A, B, 1)
#define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D)
#define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D)
#define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D)
@@ -267,8 +283,12 @@
#define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
#define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5)
#define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5)
+#define __builtin_ia32_rndscalesd_round(A, B, C, D) __builtin_ia32_rndscalesd_round(A, B, 1, 4)
+#define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4)
#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 1)
+#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 1)
#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1)
#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1)
#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1)
@@ -285,8 +305,12 @@
#define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E)
#define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1)
#define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1)
+#define __builtin_ia32_sqrtss_round(A, B, C) __builtin_ia32_sqrtss_round(A, B, 1)
+#define __builtin_ia32_sqrtsd_round(A, B, C) __builtin_ia32_sqrtsd_round(A, B, 1)
#define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 1)
+#define __builtin_ia32_subss_round(A, B, C) __builtin_ia32_subss_round(A, B, 1)
#define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D)
#define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D)
#define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5)
@@ -315,12 +339,8 @@
#define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 1)
-#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_round(A, B, C, D) __builtin_ia32_vfmaddsd3_round(A, B, C, 1)
+#define __builtin_ia32_vfmaddss3_round(A, B, C, D) __builtin_ia32_vfmaddss3_round(A, B, C, 1)
#define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1)
@@ -331,8 +351,6 @@
#define __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubps512_mask3(A, B, C, D, 1)
-#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 1)
-#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfnmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddps512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, 1)
diff --git a/gcc/testsuite/gcc.target/i386/testimm-10.c b/gcc/testsuite/gcc.target/i386/testimm-10.c
index 0699787..d744e1c 100644
--- a/gcc/testsuite/gcc.target/i386/testimm-10.c
+++ b/gcc/testsuite/gcc.target/i386/testimm-10.c
@@ -77,7 +77,13 @@ test8bit (void)
m512 = _mm512_mask_fixupimm_ps (m512, mmask16, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m512 = _mm512_maskz_fixupimm_ps (mmask16, m512, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
+ m128d = _mm_fixupimm_sd (m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
+ m128d = _mm_mask_fixupimm_sd (m128d, mmask8, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
+ m128d = _mm_maskz_fixupimm_sd (mmask8, m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
+ m128 = _mm_fixupimm_ss (m128, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
+ m128 = _mm_mask_fixupimm_ss (m128, mmask8, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
+ m128 = _mm_maskz_fixupimm_ss (mmask8, m128, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m512i = _mm512_rol_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
m512i = _mm512_mask_rol_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
@@ -107,6 +113,8 @@ test8bit (void)
m512 = _mm512_mask_roundscale_ps (m512, mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m512 = _mm512_maskz_roundscale_ps (mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
+ m128d = _mm_roundscale_sd (m128d, m128d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
+ m128 = _mm_roundscale_ss (m128, m128, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m512i = _mm512_alignr_epi32 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
m512i = _mm512_mask_alignr_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
@@ -179,5 +187,6 @@ test4bit (void) {
m512 = _mm512_mask_getmant_ps (m512, mmask16, m512, 1, 64); /* { dg-error "the immediate argument must be a 4-bit immediate" } */
m512 = _mm512_maskz_getmant_ps (mmask16, m512, 1, 64); /* { dg-error "the immediate argument must be a 4-bit immediate" } */
-
+ m128d = _mm_getmant_sd (m128d, m128d, 1, 64); /* { dg-error "the immediate argument must be a 4-bit immediate" } */
+ m128 = _mm_getmant_ss (m128, m128, 1, 64); /* { dg-error "the immediate argument must be a 4-bit immediate" } */
}
diff --git a/gcc/testsuite/gcc.target/i386/testround-1.c b/gcc/testsuite/gcc.target/i386/testround-1.c
index 2c8fe2b..20c039a 100644
--- a/gcc/testsuite/gcc.target/i386/testround-1.c
+++ b/gcc/testsuite/gcc.target/i386/testround-1.c
@@ -19,12 +19,19 @@ __mmask16 mmask16;
void
test_round (void)
{
+ m128d = _mm_add_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_add_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_sub_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_sub_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+
m512d = _mm512_sqrt_round_pd (m512d, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_sqrt_round_pd (m512d, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_maskz_sqrt_round_pd (mmask8, m512d, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_sqrt_round_ps (m512, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_sqrt_round_ps (m512, mmask16, m512, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_sqrt_round_ps (mmask16, m512, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_sqrt_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_sqrt_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_add_round_pd (m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_add_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
@@ -51,6 +58,10 @@ test_round (void)
m512 = _mm512_div_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_div_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_div_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_mul_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_mul_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_div_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_div_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_scalef_round_pd(m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_scalef_round_pd(m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
@@ -58,6 +69,8 @@ test_round (void)
m512 = _mm512_scalef_round_ps(m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_scalef_round_ps(m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_scalef_round_ps(mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_scalef_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_scalef_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_fmadd_round_pd (m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_fmadd_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
@@ -141,6 +154,16 @@ test_round (void)
m256 = _mm512_cvt_roundpd_ps (m512d, 7); /* { dg-error "incorrect rounding operand" } */
m256 = _mm512_mask_cvt_roundpd_ps (m256, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand" } */
m256 = _mm512_maskz_cvt_roundpd_ps (mmask8, m512d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_cvt_roundsd_ss (m128, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+
+ m128d = _mm_fmadd_round_sd (m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_fmadd_round_ss (m128, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_fmsub_round_sd (m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_fmsub_round_ss (m128, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_fnmadd_round_sd (m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_fnmadd_round_ss (m128, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_fnmsub_round_sd (m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_fnmsub_round_ss (m128, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_max_round_pd (m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_max_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
@@ -195,6 +218,10 @@ test_round (void)
m512 = _mm512_mask_cvt_roundph_ps (m512, mmask16, m256i, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_cvt_roundph_ps (mmask16, m256i, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_cvt_roundss_sd (m128d, m128, 7); /* { dg-error "incorrect rounding operand" } */
+
+ m128 = _mm_getexp_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_getexp_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_getexp_round_ps (m512, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_getexp_round_ps (m512, mmask16, m512, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_getexp_round_ps (mmask16, m512, 7); /* { dg-error "incorrect rounding operand" } */
@@ -207,6 +234,8 @@ test_round (void)
m512 = _mm512_getmant_round_ps (m512, 0, 0, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_getmant_round_ps (m512, mmask16, m512, 0, 0, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_getmant_round_ps (mmask16, m512, 0, 0, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_getmant_round_sd (m128d, m128d, 0, 0, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_getmant_round_ss (m128, m128, 0, 0, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_roundscale_round_ps (m512, 4, 7); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_roundscale_round_ps (m512, mmask16, m512, 4, 7); /* { dg-error "incorrect rounding operand" } */
@@ -214,6 +243,8 @@ test_round (void)
m512d = _mm512_roundscale_round_pd (m512d, 4, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_roundscale_round_pd (m512d, mmask8, m512d, 4, 7); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_maskz_roundscale_round_pd (mmask8, m512d, 4, 7); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_roundscale_round_ss (m128, m128, 4, 7); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_roundscale_round_sd (m128d, m128d, 4, 7); /* { dg-error "incorrect rounding operand" } */
mmask8 = _mm512_cmp_round_pd_mask (m512d, m512d, 4, 7); /* { dg-error "incorrect rounding operand" } */
mmask16 = _mm512_cmp_round_ps_mask (m512, m512, 4, 7); /* { dg-error "incorrect rounding operand" } */
@@ -231,12 +262,19 @@ test_round (void)
void
test_round_sae (void)
{
+ m128d = _mm_add_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_add_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_sub_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_sub_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand" } */
+
m512d = _mm512_sqrt_round_pd (m512d, 5); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_sqrt_round_pd (m512d, mmask8, m512d, 5); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_maskz_sqrt_round_pd (mmask8, m512d, 5); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_sqrt_round_ps (m512, 5); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_sqrt_round_ps (m512, mmask16, m512, 5); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_sqrt_round_ps (mmask16, m512, 5); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_sqrt_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_sqrt_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_add_round_pd (m512d, m512d, 5); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_add_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand" } */
@@ -263,6 +301,10 @@ test_round_sae (void)
m512 = _mm512_div_round_ps (m512, m512, 5); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_div_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_div_round_ps (mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_mul_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_mul_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_div_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_div_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_scalef_round_pd(m512d, m512d, 5); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_scalef_round_pd(m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand" } */
@@ -270,6 +312,8 @@ test_round_sae (void)
m512 = _mm512_scalef_round_ps(m512, m512, 5); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_scalef_round_ps(m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_scalef_round_ps(mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_scalef_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_scalef_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_fmadd_round_pd (m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_fmadd_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand" } */
@@ -353,6 +397,16 @@ test_round_sae (void)
m256 = _mm512_cvt_roundpd_ps (m512d, 5); /* { dg-error "incorrect rounding operand" } */
m256 = _mm512_mask_cvt_roundpd_ps (m256, mmask8, m512d, 5); /* { dg-error "incorrect rounding operand" } */
m256 = _mm512_maskz_cvt_roundpd_ps (mmask8, m512d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_cvt_roundsd_ss (m128, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+
+ m128d = _mm_fmadd_round_sd (m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_fmadd_round_ss (m128, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_fmsub_round_sd (m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_fmsub_round_ss (m128, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_fnmadd_round_sd (m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_fnmadd_round_ss (m128, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_fnmsub_round_sd (m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_fnmsub_round_ss (m128, m128, m128, 5); /* { dg-error "incorrect rounding operand" } */
}
void
@@ -411,6 +465,10 @@ test_sae_only (void)
m512 = _mm512_mask_cvt_roundph_ps (m512, mmask16, m256i, 3); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_cvt_roundph_ps (mmask16, m256i, 3); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_cvt_roundss_sd (m128d, m128, 3); /* { dg-error "incorrect rounding operand" } */
+
+ m128 = _mm_getexp_round_ss (m128, m128, 3); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_getexp_round_sd (m128d, m128d, 3); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_getexp_round_ps (m512, 3); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_getexp_round_ps (m512, mmask16, m512, 3); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_getexp_round_ps (mmask16, m512, 3); /* { dg-error "incorrect rounding operand" } */
@@ -423,12 +481,17 @@ test_sae_only (void)
m512 = _mm512_getmant_round_ps (m512, 0, 0, 3); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_getmant_round_ps (m512, mmask16, m512, 0, 0, 3); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_getmant_round_ps (mmask16, m512, 0, 0, 3); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_getmant_round_sd (m128d, m128d, 0, 0, 3); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_getmant_round_ss (m128, m128, 0, 0, 3); /* { dg-error "incorrect rounding operand" } */
+
m512 = _mm512_roundscale_round_ps (m512, 4, 3); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_mask_roundscale_round_ps (m512, mmask16, m512, 4, 3); /* { dg-error "incorrect rounding operand" } */
m512 = _mm512_maskz_roundscale_round_ps (mmask16, m512, 4, 3); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_roundscale_round_pd (m512d, 4, 3); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_mask_roundscale_round_pd (m512d, mmask8, m512d, 4, 3); /* { dg-error "incorrect rounding operand" } */
m512d = _mm512_maskz_roundscale_round_pd (mmask8, m512d, 4, 3); /* { dg-error "incorrect rounding operand" } */
+ m128 = _mm_roundscale_round_ss (m128, m128, 4, 3); /* { dg-error "incorrect rounding operand" } */
+ m128d = _mm_roundscale_round_sd (m128d, m128d, 4, 3); /* { dg-error "incorrect rounding operand" } */
mmask8 = _mm512_cmp_round_pd_mask (m512d, m512d, 4, 3); /* { dg-error "incorrect rounding operand" } */
mmask16 = _mm512_cmp_round_ps_mask (m512, m512, 4, 3); /* { dg-error "incorrect rounding operand" } */