diff options
author | Ilya Tocar <ilya.tocar@intel.com> | 2014-02-20 06:26:38 +0000 |
---|---|---|
committer | Kirill Yukhin <kyukhin@gcc.gnu.org> | 2014-02-20 06:26:38 +0000 |
commit | df62b4af2e7f921ee6adb2a70f62f6c3846139d0 (patch) | |
tree | a5db3a8897ee4547063dd5e963c11dbe4c4b9f1d | |
parent | a13cfd3e9bca793f3c6efd75eb10b654c3ebd41f (diff) | |
download | gcc-df62b4af2e7f921ee6adb2a70f62f6c3846139d0.zip gcc-df62b4af2e7f921ee6adb2a70f62f6c3846139d0.tar.gz gcc-df62b4af2e7f921ee6adb2a70f62f6c3846139d0.tar.bz2 |
avx512erintrin.h (_mm_rcp28_round_sd): Swap operands.
gcc/
* config/i386/avx512erintrin.h (_mm_rcp28_round_sd): Swap operands.
(_mm_rcp28_round_ss): Ditto.
(_mm_rsqrt28_round_sd): Ditto.
(_mm_rsqrt28_round_ss): Ditto.
* config/i386/avx512erintrin.h (_mm_rcp14_round_sd): Ditto.
(_mm_rcp14_round_ss): Ditto.
(_mm_rsqrt14_round_sd): Ditto.
(_mm_rsqrt14_round_ss): Ditto.
* config/i386/sse.md (rsqrt14<mode>): Put nonimmediate operand as
the first input operand, get rid of match_dup.
(avx512er_exp2<mode><mask_name><round_saeonly_name>): Set type
attribute to sse.
(<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>):
Ditto.
(avx512er_vmrcp28<mode><round_saeonly_name>): Put nonimmediate
operand as the first input operand, set type attribute.
(<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>):
Set type attribute.
(avx512er_vmrsqrt28<mode><round_saeonly_name>): Put nonimmediate
operand as the first input operand, set type attribute.
testsuite/gcc/
* gcc.target/i386/avx512er-vrcp28sd-2.c: Distinguish src1 and src2.
* gcc.target/i386/avx512er-vrcp28ss-2.c: Call correct intrinsic.
* gcc.target/i386/avx512er-vrsqrt28sd-2.c: Distinguish src1 and src2.
* gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto.
* gcc.target/i386/avx512f-vrcp14sd-2.c: Fix reference calculation.
* gcc.target/i386/avx512f-vrcp14ss-2.c: Fix reference calculation.
Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com>
From-SVN: r207932
-rw-r--r-- | gcc/ChangeLog | 24 | ||||
-rw-r--r-- | gcc/config/i386/avx512erintrin.h | 24 | ||||
-rw-r--r-- | gcc/config/i386/avx512fintrin.h | 16 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 16 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c | 8 |
11 files changed, 94 insertions, 52 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8f84dbd..6748c0f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,27 @@ +2014-02-20 Ilya Tocar <ilya.tocar@intel.com> + Kirill Yukhin <kirill.yukhin@intel.com> + + * config/i386/avx512erintrin.h (_mm_rcp28_round_sd): Swap operands. + (_mm_rcp28_round_ss): Ditto. + (_mm_rsqrt28_round_sd): Ditto. + (_mm_rsqrt28_round_ss): Ditto. + * config/i386/avx512erintrin.h (_mm_rcp14_round_sd): Ditto. + (_mm_rcp14_round_ss): Ditto. + (_mm_rsqrt14_round_sd): Ditto. + (_mm_rsqrt14_round_ss): Ditto. + * config/i386/sse.md (rsqrt14<mode>): Put nonimmediate operand as + the first input operand, get rid of match_dup. + (avx512er_exp2<mode><mask_name><round_saeonly_name>): Set type + attribute to sse. + (<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>): + Ditto. + (avx512er_vmrcp28<mode><round_saeonly_name>): Put nonimmediate + operand as the first input operand, set type attribute. + (<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>): + Set type attribute. + (avx512er_vmrsqrt28<mode><round_saeonly_name>): Put nonimmediate + operand as the first input operand, set type attribute. + 2014-02-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * config/rs6000/rs6000.c (vspltis_constant): Fix most significant diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h index 6fe05bc..f6870a5 100644 --- a/gcc/config/i386/avx512erintrin.h +++ b/gcc/config/i386/avx512erintrin.h @@ -163,8 +163,8 @@ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R) { - return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __A, - (__v2df) __B, + return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B, + (__v2df) __A, __R); } @@ -172,8 +172,8 @@ extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R) { - return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __A, - (__v4sf) __B, + return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B, + (__v4sf) __A, __R); } @@ -237,8 +237,8 @@ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R) { - return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __A, - (__v2df) __B, + return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B, + (__v2df) __A, __R); } @@ -246,8 +246,8 @@ extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R) { - return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __A, - (__v4sf) __B, + return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B, + (__v4sf) __A, __R); } @@ -375,16 +375,16 @@ _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R) _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) #define _mm_rcp28_sd(A, B) \ - __builtin_ia32_rcp28sd_round(A, B, _MM_FROUND_CUR_DIRECTION) + __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION) #define _mm_rcp28_ss(A, B) \ - __builtin_ia32_rcp28ss_round(A, B, _MM_FROUND_CUR_DIRECTION) + __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION) #define _mm_rsqrt28_sd(A, B) \ - __builtin_ia32_rsqrt28sd_round(A, B, _MM_FROUND_CUR_DIRECTION) + __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION) #define _mm_rsqrt28_ss(A, B) \ - __builtin_ia32_rsqrt28ss_round(A, B, _MM_FROUND_CUR_DIRECTION) + __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION) #ifdef __DISABLE_AVX512ER__ #undef __DISABLE_AVX512ER__ diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index b3a4f3a..65a49c5 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -1470,16 +1470,16 @@ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rcp14_sd (__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A, - (__v2df) __B); + return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B, + (__v2df) __A); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rcp14_ss (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A, - (__v4sf) __B); + return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B, + (__v4sf) __A); } extern __inline __m512d @@ -1544,16 +1544,16 @@ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt14_sd (__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A, - (__v2df) __B); + return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B, + (__v2df) __A); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt14_ss (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A, - (__v4sf) __B); + return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B, + (__v4sf) __A); } #ifdef __OPTIMIZE__ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5595767..487c917 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1551,13 +1551,12 @@ [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 1 "register_operand" "v") - (match_operand:VF_128 2 "nonimmediate_operand" "vm")] + [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] UNSPEC_RSQRT14) - (match_dup 1) + (match_operand:VF_128 2 "register_operand" "v") (const_int 1)))] "TARGET_AVX512F" - "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) @@ -12804,6 +12803,7 @@ "TARGET_AVX512ER" "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "prefix" "evex") + (set_attr "type" "sse") (set_attr "mode" "<MODE>")]) (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>" @@ -12814,6 +12814,7 @@ "TARGET_AVX512ER" "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "prefix" "evex") + (set_attr "type" "sse") (set_attr "mode" "<MODE>")]) (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>" @@ -12825,9 +12826,10 @@ (match_operand:VF_128 2 "register_operand" "v") (const_int 1)))] "TARGET_AVX512ER" - "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}" + "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}" [(set_attr "length_immediate" "1") (set_attr "prefix" "evex") + (set_attr "type" "sse") (set_attr "mode" "<MODE>")]) (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>" @@ -12838,6 +12840,7 @@ "TARGET_AVX512ER" "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "prefix" "evex") + (set_attr "type" "sse") (set_attr "mode" "<MODE>")]) (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>" @@ -12849,8 +12852,9 @@ (match_operand:VF_128 2 "register_operand" "v") (const_int 1)))] "TARGET_AVX512ER" - "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}" + "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}" [(set_attr "length_immediate" "1") + (set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index fd796d7..5aad2d5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2014-02-20 Ilya Tocar <ilya.tocar@intel.com> + Kirill Yukhin <kirill.yukhin@intel.com> + + * gcc.target/i386/avx512er-vrcp28sd-2.c: Distinguish src1 and src2. + * gcc.target/i386/avx512er-vrcp28ss-2.c: Call correct intrinsic. + * gcc.target/i386/avx512er-vrsqrt28sd-2.c: Distinguish src1 and src2. + * gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto. + * gcc.target/i386/avx512f-vrcp14sd-2.c: Fix reference calculation. + * gcc.target/i386/avx512f-vrcp14ss-2.c: Fix reference calculation. + 2014-02-19 Jakub Jelinek <jakub@redhat.com> PR c/37743 diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c index d30f088..889f990 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c @@ -10,19 +10,20 @@ void static avx512er_test (void) { - union128d src, res; + union128d src1, src2, res; double res_ref[2]; int i; for (i = 0; i < 2; i++) { - src.a[i] = 179.345 - 6.5645 * i; - res_ref[i] = src.a[i]; + src1.a[i] = 179.345 - 6.5645 * i; + src2.a[i] = 204179.345 + 6.5645 * i; + res_ref[i] = src1.a[i]; } - res_ref[0] = 1.0 / src.a[0]; + res_ref[0] = 1.0 / src2.a[0]; - res.x = _mm_rcp28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC); + res.x = _mm_rcp28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC); if (checkVd (res.a, res_ref, 2)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c index 499a977..3280879 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c @@ -10,19 +10,20 @@ void static avx512er_test (void) { - union128 src, res; + union128 src1, src2, res; float res_ref[4]; int i; for (i = 0; i < 4; i++) { - src.a[i] = 179.345 - 6.5645 * i; - res_ref[i] = src.a[i]; + src1.a[i] = 179.345 - 6.5645 * i; + src2.a[i] = 179345.006 + 6.5645 * i; + res_ref[i] = src1.a[i]; } - res_ref[0] = 1.0 / src.a[0]; + res_ref[0] = 1.0 / src2.a[0]; - res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC); + res.x = _mm_rcp28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); if (checkVf (res.a, res_ref, 4)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c index 1537a59..bd217e8 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c @@ -10,19 +10,20 @@ void static avx512er_test (void) { - union128d src, res; + union128d src1, src2, res; double res_ref[2]; int i; for (i = 0; i < 2; i++) { - src.a[i] = 179.345 - 6.5645 * i; - res_ref[i] = src.a[i]; + src1.a[i] = 179.345 - 6.5645 * i; + src2.a[i] = 45 - 6.5645 * i; + res_ref[i] = src1.a[i]; } - res_ref[0] = 1.0 / sqrt (src.a[0]); + res_ref[0] = 1.0 / sqrt (src2.a[0]); - res.x = _mm_rsqrt28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC); + res.x = _mm_rsqrt28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC); if (checkVd (res.a, res_ref, 2)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c index f88422e..f7bfff5 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c @@ -10,19 +10,20 @@ void static avx512er_test (void) { - union128 src, res; + union128 src1, src2, res; float res_ref[4]; int i; for (i = 0; i < 4; i++) { - src.a[i] = 179.345 - 6.5645 * i; - res_ref[i] = src.a[i]; + src1.a[i] = 179.345 - 6.5645 * i; + src2.a[i] = 179221345 + 6.5645 * i; + res_ref[i] = src1.a[i]; } - res_ref[0] = 1.0 / sqrt (src.a[0]); + res_ref[0] = 1.0 / sqrt (src2.a[0]); - res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC); + res.x = _mm_rsqrt28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); if (checkVf (res.a, res_ref, 4)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c index 0c9211a..f944600 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c @@ -8,8 +8,8 @@ static void compute_vrcp14sd (double *s1, double *s2, double *r) { - r[0] = 1.0 / s1[0]; - r[1] = s2[1]; + r[0] = 1.0 / s2[0]; + r[1] = s1[1]; } static void diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c index 3344dad..7aca591 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c @@ -8,10 +8,10 @@ static void compute_vrcp14ss (float *s1, float *s2, float *r) { - r[0] = 1.0 / s1[0]; - r[1] = s2[1]; - r[2] = s2[2]; - r[3] = s2[3]; + r[0] = 1.0 / s2[0]; + r[1] = s1[1]; + r[2] = s1[2]; + r[3] = s1[3]; } static void |