aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOlga Makhotina <olga.makhotina@intel.com>2018-02-12 06:09:20 +0000
committerKirill Yukhin <kyukhin@gcc.gnu.org>2018-02-12 06:09:20 +0000
commit158061a65bf2e68e44bc2f9622be41f57b0a47d0 (patch)
tree1c646ed6aa491b5290aa15e8cc7197e9aa553219
parentba43b9fadd8b064023ab172db5a6e73027c8be44 (diff)
downloadgcc-158061a65bf2e68e44bc2f9622be41f57b0a47d0.zip
gcc-158061a65bf2e68e44bc2f9622be41f57b0a47d0.tar.gz
gcc-158061a65bf2e68e44bc2f9622be41f57b0a47d0.tar.bz2
Add missing mask[z]_scalef_round_s[d,s] intrinsics
gcc/ * config/i386/avx512fintrin.h (_mm_mask_scalef_round_sd, _mm_maskz_scalef_round_sd, _mm_mask_scalef_round_ss, _mm_maskz_scalef_round_ss): New intrinsics. (__builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round): Fix. * config/i386/i386-builtin.def (__builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round): Remove. (__builtin_ia32_scalefsd_mask_round, __builtin_ia32_scalefss_mask_round): New intrinsics. * config/i386/sse.md (vmscalef<mode><round_name>): Renamed to ... (vmscalef<mode><mask_scalar_name><round_scalar_name>): ... this. ((match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")): Changed to ... ((match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")): ... this. ("vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0| %0, %1, %2<round_op3>}"): Changed to ... ("vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"): ... this. * config/i386/subst.md (round_scalar_nimm_predicate): New. gcc/testsuite/ * gcc.target/i386/avx512f-vscalefsd-1.c (_mm_mask_scalef_round_sd, _mm_maskz_scalef_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vscalefsd-2.c (_mm_scalef_round_sd, _mm_mask_scalef_round_sd, _mm_maskz_scalef_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vscalefss-1.c (_mm_mask_scalef_round_ss, _mm_maskz_scalef_round_ss): Test new intrinsics. * gcc.target/i386/avx512f-vscalefss-2.c (_mm_scalef_round_ss, _mm_mask_scalef_round_ss, _mm_maskz_scalef_round_ss): Test new intrinsics. * gcc.target/i386/avx-1.c (__builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round): Remove builtin. (__builtin_ia32_scalefsd_mask_round, __builtin_ia32_scalefss_mask_round): Test new builtin. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. From-SVN: r257578
-rw-r--r--gcc/ChangeLog23
-rw-r--r--gcc/config/i386/avx512fintrin.h85
-rw-r--r--gcc/config/i386/i386-builtin.def4
-rw-r--r--gcc/config/i386/sse.md6
-rw-r--r--gcc/config/i386/subst.md1
-rw-r--r--gcc/testsuite/ChangeLog19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-1.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c7
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c4
13 files changed, 192 insertions, 27 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ac1803e..b29822b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,28 @@
2018-02-12 Olga Makhotina <olga.makhotina@intel.com>
+ * config/i386/avx512fintrin.h (_mm_mask_scalef_round_sd,
+ _mm_maskz_scalef_round_sd, _mm_mask_scalef_round_ss,
+ _mm_maskz_scalef_round_ss): New intrinsics.
+ (__builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round): Fix.
+ * config/i386/i386-builtin.def (__builtin_ia32_scalefsd_round,
+ __builtin_ia32_scalefss_round): Remove.
+ (__builtin_ia32_scalefsd_mask_round,
+ __builtin_ia32_scalefss_mask_round): New intrinsics.
+ * config/i386/sse.md (vmscalef<mode><round_name>): Renamed to ...
+ (vmscalef<mode><mask_scalar_name><round_scalar_name>): ... this.
+ ((match_operand:VF_128 2 "<round_nimm_predicate>"
+ "<round_constraint>")): Changed to ...
+ ((match_operand:VF_128 2 "<round_scalar_nimm_predicate>"
+ "<round_scalar_constraint>")): ... this.
+ ("vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|
+ %0, %1, %2<round_op3>}"): Changed to ...
+ ("vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1,
+ %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1,
+ %2<round_scalar_mask_op3>}"): ... this.
+ * config/i386/subst.md (round_scalar_nimm_predicate): New.
+
+2018-02-12 Olga Makhotina <olga.makhotina@intel.com>
+
* config/i386/avx512fintrin.h (_mm_mask_sqrt_round_sd)
(_mm_maskz_sqrt_round_sd, _mm_mask_sqrt_round_ss)
(_mm_maskz_sqrt_round_ss): New intrinsics.
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index ffbb1d9..ba65aca 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -3103,18 +3103,67 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
{
- return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
- (__v2df) __B,
- __R);
+ return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
{
- return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
- (__v4sf) __B,
- __R);
+ return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
}
#else
#define _mm512_scalef_round_pd(A, B, C) \
@@ -3136,10 +3185,12 @@ _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
(__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm_scalef_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
+ (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
+ (__v2df)_mm_setzero_pd (), -1, C)
#define _mm_scalef_round_ss(A, B, C) \
- (__m128)__builtin_ia32_scalefss_round(A, B, C)
+ (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
+ (__v4sf)_mm_setzero_ps (), -1, C)
#endif
#ifdef __OPTIMIZE__
@@ -12182,18 +12233,24 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_sd (__m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
- (__v2df) __B,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_ss (__m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
- (__v4sf) __B,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
extern __inline __m512d
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 5061042..169189d 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2718,8 +2718,8 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_mask_round, "__builtin_ia32_scalefss_mask_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 47687a6..da9af23 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8318,17 +8318,17 @@
operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
})
-(define_insn "avx512f_vmscalef<mode><round_name>"
+(define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
+ (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
UNSPEC_SCALEF)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
+ "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 9ed2e8d..3f63865 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -271,6 +271,7 @@
(define_subst_attr "round_scalar_mask_op3" "round_scalar" "" "<round_scalar_mask_operand3>")
(define_subst_attr "round_scalar_constraint" "round_scalar" "vm" "v")
(define_subst_attr "round_scalar_prefix" "round_scalar" "vex" "evex")
+(define_subst_attr "round_scalar_nimm_predicate" "round_scalar" "vector_operand" "register_operand")
(define_subst "round_scalar"
[(set (match_operand:SUBST_V 0)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9c53f99..1a76336 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,24 @@
2018-02-12 Olga Makhotina <olga.makhotina@intel.com>
+ * gcc.target/i386/avx512f-vscalefsd-1.c (_mm_mask_scalef_round_sd,
+ _mm_maskz_scalef_round_sd): Test new intrinsics.
+ * gcc.target/i386/avx512f-vscalefsd-2.c (_mm_scalef_round_sd,
+ (_mm_mask_scalef_round_sd, _mm_maskz_scalef_round_sd): Test new
+ intrinsics.
+ * gcc.target/i386/avx512f-vscalefss-1.c (_mm_mask_scalef_round_ss,
+ _mm_maskz_scalef_round_ss): Test new intrinsics.
+ * gcc.target/i386/avx512f-vscalefss-2.c (_mm_scalef_round_ss,
+ _mm_mask_scalef_round_ss, _mm_maskz_scalef_round_ss): Test new
+ intrinsics.
+ * gcc.target/i386/avx-1.c (__builtin_ia32_scalefsd_round,
+ __builtin_ia32_scalefss_round): Remove builtin.
+ (__builtin_ia32_scalefsd_mask_round,
+ __builtin_ia32_scalefss_mask_round): Test new builtin.
+ * gcc.target/i386/sse-13.c: Ditto.
+ * gcc.target/i386/sse-23.c: Ditto.
+
+2018-02-12 Olga Makhotina <olga.makhotina@intel.com>
+
* gcc.target/i386/avx512f-vsqrtsd-1.c (_mm_mask_sqrt_round_sd)
(_mm_maskz_sqrt_round_sd): Test new intrinsics.
* gcc.target/i386/avx512f-vsqrtsd-2.c (_mm_sqrt_round_sd)
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index acfc85b..c877f99 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -287,8 +287,8 @@
#define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4)
#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8)
#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8)
-#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 8)
-#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 8)
+#define __builtin_ia32_scalefsd_mask_round(A, B, C, D, E) __builtin_ia32_scalefsd_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_scalefss_mask_round(A, B, C, D, E) __builtin_ia32_scalefss_mask_round(A, B, C, D, 8)
#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 8)
#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 8)
#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 8)
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c
index c883192..09bc5c6 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c
@@ -1,14 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
#include <immintrin.h>
volatile __m128d x;
+volatile __mmask8 m;
void extern
avx512f_test (void)
{
x = _mm_scalef_sd (x, x);
x = _mm_scalef_round_sd (x, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x = _mm_mask_scalef_round_sd (x, m, x, x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+ x = _mm_maskz_scalef_round_sd (m, x, x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c
index 28738f7..afe73dc 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c
@@ -6,6 +6,7 @@
#include "avx512f-check.h"
#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
static void
compute_scalefsd (double *s1, double *s2, double *r)
@@ -17,20 +18,45 @@ compute_scalefsd (double *s1, double *s2, double *r)
void static
avx512f_test (void)
{
- union128d res1, s1, s2;
+ union128d res1, res2, res3, res4;
+ union128d s1, s2;
double res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
int i;
for (i = 0; i < SIZE; i++)
{
s1.a[i] = 11.5 * (i + 1);
s2.a[i] = 10.5 * (i + 1);
+ res_ref[i] = 9.5 * (i + 1);
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ res4.a[i] = DEFAULT_VALUE;
}
res1.x = _mm_scalef_sd (s1.x, s2.x);
+ res2.x = _mm_scalef_round_sd (s1.x, s2.x,
+ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ res3.x = _mm_mask_scalef_round_sd (s1.x, mask, s1.x, s2.x,
+ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ res4.x = _mm_maskz_scalef_round_sd (mask, s1.x, s2.x,
+ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
compute_scalefsd (s1.a, s2.a, res_ref);
if (check_union128d (res1, res_ref))
abort ();
+ if (check_union128d (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+
+ if (check_union128d (res3, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+
+ if (check_union128d (res4, res_ref))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c
index f59525f..d1af336 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c
@@ -1,14 +1,20 @@
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m128 x;
+volatile __mmask8 m;
void extern
avx512f_test (void)
{
x = _mm_scalef_ss (x, x);
x = _mm_scalef_round_ss (x, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x = _mm_mask_scalef_round_ss (x, m, x, x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+ x = _mm_maskz_scalef_round_ss (m, x, x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c
index 9356184..811ff15 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c
@@ -6,6 +6,7 @@
#include "avx512f-check.h"
#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
static void
compute_scalefss (float *s1, float *s2, float *r)
@@ -19,20 +20,45 @@ compute_scalefss (float *s1, float *s2, float *r)
static void
avx512f_test (void)
{
- union128 res1, s1, s2;
+ union128 res1, res2, res3, res4;
+ union128 s1, s2;
float res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
int i;
for (i = 0; i < SIZE; i++)
{
s1.a[i] = 11.5 * (i + 1);
s2.a[i] = 10.5 * (i + 1);
+ res_ref[i] = 9.5 * (i + 1);
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ res4.a[i] = DEFAULT_VALUE;
}
res1.x = _mm_scalef_ss (s1.x, s2.x);
+ res2.x = _mm_scalef_round_ss (s1.x, s2.x,
+ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ res3.x = _mm_mask_scalef_round_ss (s1.x, mask, s1.x, s2.x,
+ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ res4.x = _mm_maskz_scalef_round_ss (mask, s1.x, s2.x,
+ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
compute_scalefss (s1.a, s2.a, res_ref);
if (check_union128 (res1, res_ref))
abort ();
+ if (check_union128 (res2, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, 1);
+
+ if (check_union128 (res3, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+
+ if (check_union128 (res4, res_ref))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 7b03199..b43f903 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -304,8 +304,8 @@
#define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4)
#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8)
#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8)
-#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 8)
-#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 8)
+#define __builtin_ia32_scalefsd_mask_round(A, B, C, D, E) __builtin_ia32_scalefsd_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_scalefss_mask_round(A, B, C, D, E) __builtin_ia32_scalefss_mask_round(A, B, C, D, 8)
#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 8)
#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 8)
#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 8)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 0b18eec..8f93d65 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -305,8 +305,8 @@
#define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4)
#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8)
#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8)
-#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 8)
-#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 8)
+#define __builtin_ia32_scalefsd_mask_round(A, B, C, D, E) __builtin_ia32_scalefsd_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_scalefss_mask_round(A, B, C, D, E) __builtin_ia32_scalefss_mask_round(A, B, C, D, 8)
#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 8)
#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 8)
#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 8)