diff options
author | Olga Makhotina <olga.makhotina@intel.com> | 2017-10-17 07:12:45 +0000 |
---|---|---|
committer | Kirill Yukhin <kyukhin@gcc.gnu.org> | 2017-10-17 07:12:45 +0000 |
commit | af297249647b4d5b783239916f27a4b68829f916 (patch) | |
tree | e7a8e456bd0708bb84cf89fe8c5b577f1e09f627 /gcc | |
parent | e7f168a7ffc18cfe593928543d6cf3ea5e674fc9 (diff) | |
download | gcc-af297249647b4d5b783239916f27a4b68829f916.zip gcc-af297249647b4d5b783239916f27a4b68829f916.tar.gz gcc-af297249647b4d5b783239916f27a4b68829f916.tar.bz2 |
Add missing REDUCE[SD,SS] intrinsics
gcc/
* config/i386/avx512dqintrin.h (_mm_mask_reduce_sd,
_mm_maskz_reduce_sd, _mm_mask_reduce_ss,=20
_mm_maskz_reduce_ss): New.
* config/i386/i386-builtin.def (__builtin_ia32_reducesd_mask,
__builtin_ia32_reducess_mask): Ditto..
(__builtin_ia32_reducesd, __builtin_ia32_reducess): Remove.
* config/i386/sse.md (reduces<mode>): Renamed to ...
(reduces<mode><mask_scalar_name>): ... this.
(vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}): Changed
to ...
(vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|
%0<mask_scalar_operand4>, %1, %2, %3}): ... this.
gcc/testsuite/
* gcc.target/i386/avx512dq-vreducesd-1.c (_mm_mask_reduce_sd,
_mm_maskz_reduce_sd): Test new intrinsics.
* gcc.target/i386/avx512dq-vreducesd-2.c: New.
* gcc.target/i386/avx512dq-vreducess-1.c (_mm_mask_reduce_ss,
_mm_maskz_reduce_ss): Test new intrinsics.
* gcc.target/i386/avx512dq-vreducess-2.c: New.
* gcc.target/i386/avx-1.c (__builtin_ia32_reducesd,
__builtin_ia32_reducess): Remove builtin.
(__builtin_ia32_reducesd_mask,
__builtin_ia32_reducess_mask): Test new builtin.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
From-SVN: r253803
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/config/i386/avx512dqintrin.h | 85 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin.def | 4 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 4 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-1.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1.c | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-2.c | 66 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512dq-vreducess-2.c | 68 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-13.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-23.c | 4 |
12 files changed, 274 insertions, 20 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 933e4e9..3359646 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +17-10-2017 Olga Makhotina <olga.makhotina@intel.com> + + * config/i386/avx512dqintrin.h (_mm_mask_reduce_sd, + _mm_maskz_reduce_sd, _mm_mask_reduce_ss,=20 + _mm_maskz_reduce_ss): New. + * config/i386/i386-builtin.def (__builtin_ia32_reducesd_mask, + __builtin_ia32_reducess_mask): Ditto.. + (__builtin_ia32_reducesd, __builtin_ia32_reducess): Remove. + * config/i386/sse.md (reduces<mode>): Renamed to ... + (reduces<mode><mask_scalar_name>): ... this. + (vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}): Changed + to ... + (vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>| + %0<mask_scalar_operand4>, %1, %2, %3}): ... this. + 2017-10-16 David Malcolm <dmalcolm@redhat.com> * Makefile.in (OBJS): Add unique-ptr-tests.o. diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index 88e8adb..8e887d8 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -1160,16 +1160,63 @@ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_reduce_sd (__m128d __A, __m128d __B, int __C) { - return (__m128d) __builtin_ia32_reducesd ((__v2df) __A, - (__v2df) __B, __C); + return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) _mm_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, int __C) +{ + return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) __W, + (__mmask8) __U); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C) +{ + return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, + (__v2df) __B, __C, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_reduce_ss (__m128 __A, __m128 __B, int __C) { - return (__m128) __builtin_ia32_reducess ((__v4sf) __A, - (__v4sf) __B, __C); + return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) _mm_setzero_ps (), + (__mmask8) -1); +} + + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, int __C) +{ + return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) __W, + (__mmask8) __U); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C) +{ + return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, + (__v4sf) __B, __C, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U); } extern __inline __m128d @@ -2449,12 +2496,34 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) (int) (c),(__mmask8)-1)) #define _mm_reduce_sd(A, B, C) \ - ((__m128d) __builtin_ia32_reducesd ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C))) \ + ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ + (__mmask8)-1)) + +#define _mm_mask_reduce_sd(W, U, A, B, C) \ + ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U))) + +#define _mm_maskz_reduce_sd(U, A, B, C) \ + ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ + (__mmask8)(U))) #define _mm_reduce_ss(A, B, C) \ - ((__m128) __builtin_ia32_reducess ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(A), (int)(C))) \ + ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ + (__mmask8)-1)) + +#define _mm_mask_reduce_ss(W, U, A, B, C) \ + ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), (__mmask8)(U))) + +#define _mm_maskz_reduce_ss(U, A, B, C) \ + ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ + (__mmask8)(U))) + + #endif diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 0d5d5b7..4666a4e 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1666,8 +1666,8 @@ BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df_mask, "__builtin_ia32_reducesd_mask", IX86_BUILTIN_REDUCESD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf_mask, "__builtin_ia32_reducess_mask", IX86_BUILTIN_REDUCESS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0c26bd1..19b2c69 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2522,7 +2522,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) -(define_insn "reduces<mode>" +(define_insn "reduces<mode><mask_scalar_name>" [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (unspec:VF_128 @@ -2533,7 +2533,7 @@ (match_dup 1) (const_int 1)))] "TARGET_AVX512DQ" - "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2, %3}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 33220d5..3059d23 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,18 @@ +17-10-2017 Olga Makhotina <olga.makhotina@intel.com> + + * gcc.target/i386/avx512dq-vreducesd-1.c (_mm_mask_reduce_sd, + _mm_maskz_reduce_sd): Test new intrinsics. + * gcc.target/i386/avx512dq-vreducesd-2.c: New. + * gcc.target/i386/avx512dq-vreducess-1.c (_mm_mask_reduce_ss, + _mm_maskz_reduce_ss): Test new intrinsics. + * gcc.target/i386/avx512dq-vreducess-2.c: New. + * gcc.target/i386/avx-1.c (__builtin_ia32_reducesd, + __builtin_ia32_reducess): Remove builtin. + (__builtin_ia32_reducesd_mask, + __builtin_ia32_reducess_mask): Test new builtin. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + 2017-10-16 Martin Liska <mliska@suse.cz> * c-c++-common/ubsan/attrib-5.c (float_cast2): Fix warning scan diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 085ba81..d03625b 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -412,8 +412,8 @@ /* avx512dqintrin.h */ #define __builtin_ia32_kshiftliqi(A, B) __builtin_ia32_kshiftliqi(A, 8) #define __builtin_ia32_kshiftriqi(A, B) __builtin_ia32_kshiftriqi(A, 8) -#define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1) -#define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1) +#define __builtin_ia32_reducess_mask(A, B, F, W, U) __builtin_ia32_reducess_mask(A, B, 1, W, U) +#define __builtin_ia32_reducesd_mask(A, B, F, W, U) __builtin_ia32_reducesd_mask(A, B, 1, W, U) #define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D) #define __builtin_ia32_reducepd512_mask(A, E, C, D) __builtin_ia32_reducepd512_mask(A, 1, C, D) #define __builtin_ia32_rangess128_round(A, B, I, F) __builtin_ia32_rangess128_round(A, B, 1, 8) diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1.c index b7549fa..b8f24a0c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1.c @@ -2,13 +2,24 @@ /* { dg-options "-mavx512dq -O2" } */ /* { dg-final { scan-assembler-times "vreducesd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducesd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducesd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducesd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + + #include <immintrin.h> +#define IMM 123 + volatile __m128d x1, x2; volatile __mmask8 m; void extern avx512dq_test (void) { - x1 = _mm_reduce_sd (x1, x2, 123); + x1 = _mm_reduce_sd (x1, x2, IMM); + + x1 = _mm_mask_reduce_sd(x1, m, x1, x2, IMM); + + x1 = _mm_maskz_reduce_sd(m, x1, x2, IMM); } diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-2.c new file mode 100644 index 0000000..93e1827 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-2.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512dq" } */ +/* { dg-require-effective-target avx512dq } */ + +#define AVX512DQ +#include "avx512f-helper.h" +#include <string.h> + +#define SIZE (AVX512F_LEN / 64) +#include "avx512f-mask-type.h" + +#define IMM 0x23 + +void +CALC (double *r, double *s) +{ + int i; + + memcpy (&r[1], &s[1], sizeof(double)); + + for (i = 0; i < 1; i++) + { + double tmp = (int) (4 * s[i]) / 4.0; + r[i] = s[i] - tmp; + } +} + +void +TEST (void) +{ + union128d res1, res2, res3; + union128d s1, s2, src; + double res_ref[2]; + MASK_TYPE mask = MASK_VALUE; + int j; + + for (j = 0; j < 2; j++) + { + s1.a[j] = j / 123.456; + s2.a[j] = j / 123.456; + res_ref[j] = j / 123.456; + res1.a[j] = DEFAULT_VALUE; + res2.a[j] = DEFAULT_VALUE; + res3.a[j] = DEFAULT_VALUE; + } + + res1.x = _mm_reduce_sd (s1.x, s2.x, IMM); + res2.x = _mm_mask_reduce_sd (s1.x, mask, s1.x, s2.x, IMM); + res3.x = _mm_maskz_reduce_sd (mask, s1.x, s2.x, IMM); + + CALC (res_ref, s2.a); + + if (check_union128d (res1, res_ref)) + abort (); + + MASK_MERGE (d) (res_ref, mask, 1); + + if (check_union128d (res2, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, 1); + + if (check_union128d (res3, res_ref)) + abort (); + +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1.c index 2a6afe9..804074e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1.c @@ -2,13 +2,23 @@ /* { dg-options "-mavx512dq -O2" } */ /* { dg-final { scan-assembler-times "vreducess\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducess\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducess\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducess\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + #include <immintrin.h> +#define IMM 123 + volatile __m128 x1, x2; volatile __mmask8 m; void extern avx512dq_test (void) { - x1 = _mm_reduce_ss (x1, x2, 123); + x1 = _mm_reduce_ss (x1, x2, IMM); + + x1 = _mm_mask_reduce_ss (x1, m, x1, x2, IMM); + + x1 = _mm_maskz_reduce_ss (m, x1, x2, IMM); } diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-2.c new file mode 100644 index 0000000..8558c3b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-2.c @@ -0,0 +1,68 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512dq" } */ +/* { dg-require-effective-target avx512dq } */ + +#define AVX512DQ +#include "avx512f-helper.h" +#include <string.h> + +#define SIZE (AVX512F_LEN / 64) +#include "avx512f-mask-type.h" + +#define IMM 0x23 + +void +CALC (float *r, float *s) +{ + int i; + + memcpy (&r[1], &s[1], 2 * sizeof(float)); + + for (i = 0; i < 2; i++) + { + float tmp = (int) (4 * s[i]) / 4.0; + r[i] = s[i] - tmp; + } +} + +void +TEST (void) +{ + printf("\nsize = %d\n\n", SIZE); + + union128 res1, res2, res3; + union128 s1, s2, src; + float res_ref[4]; + MASK_TYPE mask = MASK_VALUE; + int j; + + for (j = 0; j < 4; j++) + { + s1.a[j] = j / 123.456; + s2.a[j] = j / 123.456; + res_ref[j] = j / 123.456; + res1.a[j] = DEFAULT_VALUE; + res2.a[j] = DEFAULT_VALUE; + res3.a[j] = DEFAULT_VALUE; + } + + res1.x = _mm_reduce_ss (s1.x, s2.x, IMM); + res2.x = _mm_mask_reduce_ss (s1.x, mask, s1.x, s2.x, IMM); + res3.x = _mm_maskz_reduce_ss (mask, s1.x, s2.x, IMM); + + CALC (res_ref, s2.a); + + if (check_union128 (res1, res_ref)) + abort (); + + MASK_MERGE () (res_ref, mask, 1); + + if (check_union128 (res2, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, 1); + + if (check_union128 (res3, res_ref)) + abort (); + +} diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index c5c43b1..7ab2223 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -429,8 +429,8 @@ /* avx512dqintrin.h */ #define __builtin_ia32_kshiftliqi(A, B) __builtin_ia32_kshiftliqi(A, 8) #define __builtin_ia32_kshiftriqi(A, B) __builtin_ia32_kshiftriqi(A, 8) -#define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1) -#define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1) +#define __builtin_ia32_reducess_mask(A, B, F, W, U) __builtin_ia32_reducess_mask(A, B, 1, W, U) +#define __builtin_ia32_reducesd_mask(A, B, F, W, U) __builtin_ia32_reducesd_mask(A, B, 1, W, U) #define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D) #define __builtin_ia32_reducepd512_mask(A, E, C, D) __builtin_ia32_reducepd512_mask(A, 1, C, D) #define __builtin_ia32_rangess128_round(A, B, I, F) __builtin_ia32_rangess128_round(A, B, 1, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index fc339a5..3a90e54 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -428,8 +428,8 @@ /* avx512dqintrin.h */ #define __builtin_ia32_kshiftliqi(A, B) __builtin_ia32_kshiftliqi(A, 8) #define __builtin_ia32_kshiftriqi(A, B) __builtin_ia32_kshiftriqi(A, 8) -#define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1) -#define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1) +#define __builtin_ia32_reducess_mask(A, B, F, W, U) __builtin_ia32_reducess_mask(A, B, 1, W, U) +#define __builtin_ia32_reducesd_mask(A, B, F, W, U) __builtin_ia32_reducesd_mask(A, B, 1, W, U) #define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D) #define __builtin_ia32_reducepd512_mask(A, E, C, D) __builtin_ia32_reducepd512_mask(A, 1, C, D) #define __builtin_ia32_rangess128_round(A, B, I, F) __builtin_ia32_rangess128_round(A, B, 1, 8) |