diff options
author | Jakub Jelinek <jakub@redhat.com> | 2017-02-07 16:42:42 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2017-02-07 16:42:42 +0100 |
commit | 1f138b7529110a63c10743f3e264522a85af941a (patch) | |
tree | 33df837f5020448f97c791a4035488f34c752fbc /gcc | |
parent | a4cf4b647cd239cc57d88ed82f7243e7efdf43f5 (diff) | |
download | gcc-1f138b7529110a63c10743f3e264522a85af941a.zip gcc-1f138b7529110a63c10743f3e264522a85af941a.tar.gz gcc-1f138b7529110a63c10743f3e264522a85af941a.tar.bz2 |
re PR target/79299 (Operand size mismatch for `vpgatherqd' w/ -O3 -masm=intel -mavx512bw)
PR target/79299
* config/i386/sse.md (xtg_mode, gatherq_mode): New mode attrs.
(*avx512f_gathersi<mode>, *avx512f_gathersi<mode>_2,
*avx512f_gatherdi<mode>, *avx512f_gatherdi<mode>_2): Use them,
fix -masm=intel patterns.
* gcc.target/i386/avx512vl-pr79299-1.c: New test.
* gcc.target/i386/avx512vl-pr79299-2.c: New test.
From-SVN: r245248
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 26 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512vl-pr79299-1.c | 92 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512vl-pr79299-2.c | 294 |
5 files changed, 418 insertions, 8 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 257cd21..18c2083 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2017-02-07 Jakub Jelinek <jakub@redhat.com> + + PR target/79299 + * config/i386/sse.md (xtg_mode, gatherq_mode): New mode attrs. + (*avx512f_gathersi<mode>, *avx512f_gathersi<mode>_2, + *avx512f_gatherdi<mode>, *avx512f_gatherdi<mode>_2): Use them, + fix -masm=intel patterns. + 2017-02-07 Richard Biener <rguenther@suse.de> PR tree-optimization/79256 diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1654890..c69bcfe 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -811,6 +811,12 @@ [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t") (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")]) +;; Tie mode of assembler operand to mode iterator +(define_mode_attr xtg_mode + [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x") + (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t") + (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")]) + ;; Half mask mode for unpacks (define_mode_attr HALFMASKMODE [(DI "SI") (SI "HI")]) @@ -19034,6 +19040,12 @@ (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) +;; Memory operand override for -masm=intel of the v*gatherq* patterns. +(define_mode_attr gatherq_mode + [(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x") + (V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t") + (V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")]) + (define_expand "<avx512>_gathersi<mode>" [(parallel [(set (match_operand:VI48F 0 "register_operand") (unspec:VI48F @@ -19067,7 +19079,7 @@ UNSPEC_GATHER)) (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))] "TARGET_AVX512F" - "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}" + "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) @@ -19086,7 +19098,7 @@ UNSPEC_GATHER)) (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] "TARGET_AVX512F" - "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}" + "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) @@ -19126,9 +19138,7 @@ (clobber (match_scratch:QI 2 "=&Yk"))] "TARGET_AVX512F" { - if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4) - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %t6}"; - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"; + return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}"; } [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -19152,11 +19162,11 @@ if (<MODE>mode != <VEC_GATHER_SRCDI>mode) { if (<MODE_SIZE> != 64) - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}"; + return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}"; else - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}"; + return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}"; } - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"; + return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}"; } [(set_attr "type" "ssemov") (set_attr "prefix" "evex") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 517d562..6cc3818 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-02-07 Jakub Jelinek <jakub@redhat.com> + + PR target/79299 + * gcc.target/i386/avx512vl-pr79299-1.c: New test. + * gcc.target/i386/avx512vl-pr79299-2.c: New test. + 2017-02-07 Richard Biener <rguenther@suse.de> * gcc.dg/gimplefe-23.c: New testcase. diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr79299-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr79299-1.c new file mode 100644 index 0000000..04353f59 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr79299-1.c @@ -0,0 +1,92 @@ +/* PR target/79299 */ +/* { dg-do assemble { target avx512vl } } */ +/* { dg-require-effective-target masm_intel } */ +/* { dg-options "-Ofast -mavx512vl -masm=intel" } */ + +#define N 1024 + +unsigned long long a[N]; +unsigned int b[N], c[N], d[N], e[N], f[N]; +unsigned long long g[N], h[N], j[N], k[N]; +float l[N], m[N], n[N], o[N]; +double p[N], q[N], r[N], s[N]; + +void +f1 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = c[a[i]]; + for (i = 0; i < N; i++) + e[i] = f[i] ? f[i] : c[a[i]]; +} + +void +f2 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = c[b[i]]; + for (i = 0; i < N; i++) + e[i] = f[i] ? f[i] : c[b[i]]; +} + +void +f3 (void) +{ + int i; + for (i = 0; i < N; i++) + h[i] = g[a[i]]; + for (i = 0; i < N; i++) + j[i] = k[i] != 0.0 ? k[i] : g[a[i]]; +} + +void +f4 (void) +{ + int i; + for (i = 0; i < N; i++) + h[i] = g[b[i]]; + for (i = 0; i < N; i++) + j[i] = k[i] != 0.0 ? k[i] : g[b[i]]; +} + +void +f5 (void) +{ + int i; + for (i = 0; i < N; i++) + m[i] = l[a[i]]; + for (i = 0; i < N; i++) + n[i] = o[i] ? o[i] : l[a[i]]; +} + +void +f6 (void) +{ + int i; + for (i = 0; i < N; i++) + m[i] = c[b[i]]; + for (i = 0; i < N; i++) + n[i] = o[i] ? o[i] : c[b[i]]; +} + +void +f7 (void) +{ + int i; + for (i = 0; i < N; i++) + q[i] = p[a[i]]; + for (i = 0; i < N; i++) + r[i] = s[i] != 0.0 ? s[i] : p[a[i]]; +} + +void +f8 (void) +{ + int i; + for (i = 0; i < N; i++) + q[i] = p[b[i]]; + for (i = 0; i < N; i++) + r[i] = s[i] != 0.0 ? s[i] : p[b[i]]; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr79299-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr79299-2.c new file mode 100644 index 0000000..00dca4b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr79299-2.c @@ -0,0 +1,294 @@ +/* PR target/79299 */ +/* { dg-do assemble { target avx512vl } } */ +/* { dg-require-effective-target masm_intel } */ +/* { dg-options "-Ofast -mavx512vl -masm=intel" } */ + +#include <immintrin.h> + +__m512 +f1 (__m512i x, void const *y) +{ + return _mm512_i32gather_ps (x, y, 1); +} + +__m512 +f2 (__m512 x, __mmask16 y, __m512i z, void const *w) +{ + return _mm512_mask_i32gather_ps (x, y, z, w, 1); +} + +__m512d +f3 (__m256i x, void const *y) +{ + return _mm512_i32gather_pd (x, y, 1); +} + +__m512d +f4 (__m512d x, __mmask8 y, __m256i z, void const *w) +{ + return _mm512_mask_i32gather_pd (x, y, z, w, 1); +} + +__m256 +f5 (__m512i x, void const *y) +{ + return _mm512_i64gather_ps (x, y, 1); +} + +__m256 +f6 (__m256 x, __mmask16 y, __m512i z, void const *w) +{ + return _mm512_mask_i64gather_ps (x, y, z, w, 1); +} + +__m512d +f7 (__m512i x, void const *y) +{ + return _mm512_i64gather_pd (x, y, 1); +} + +__m512d +f8 (__m512d x, __mmask8 y, __m512i z, void const *w) +{ + return _mm512_mask_i64gather_pd (x, y, z, w, 1); +} + +__m512i +f9 (__m512i x, void const *y) +{ + return _mm512_i32gather_epi32 (x, y, 1); +} + +__m512i +f10 (__m512i x, __mmask16 y, __m512i z, void const *w) +{ + return _mm512_mask_i32gather_epi32 (x, y, z, w, 1); +} + +__m512i +f11 (__m256i x, void const *y) +{ + return _mm512_i32gather_epi64 (x, y, 1); +} + +__m512i +f12 (__m512i x, __mmask8 y, __m256i z, void const *w) +{ + return _mm512_mask_i32gather_epi64 (x, y, z, w, 1); +} + +__m256i +f13 (__m512i x, void const *y) +{ + return _mm512_i64gather_epi32 (x, y, 1); +} + +__m256i +f14 (__m256i x, __mmask16 y, __m512i z, void const *w) +{ + return _mm512_mask_i64gather_epi32 (x, y, z, w, 1); +} + +__m512i +f15 (__m512i x, void const *y) +{ + return _mm512_i64gather_epi64 (x, y, 1); +} + +__m512i +f16 (__m512i x, __mmask8 y, __m512i z, void const *w) +{ + return _mm512_mask_i64gather_epi64 (x, y, z, w, 1); +} + +__m256 +f17 (__m256 x, __mmask8 y, __m256i z, void const *w) +{ + return _mm256_mmask_i32gather_ps (x, y, z, w, 1); +} + +__m128 +f18 (__m128 x, __mmask8 y, __m128i z, void const *w) +{ + return _mm_mmask_i32gather_ps (x, y, z, w, 1); +} + +__m256d +f19 (__m256d x, __mmask8 y, __m128i z, void const *w) +{ + return _mm256_mmask_i32gather_pd (x, y, z, w, 1); +} + +__m128d +f20 (__m128d x, __mmask8 y, __m128i z, void const *w) +{ + return _mm_mmask_i32gather_pd (x, y, z, w, 1); +} + +__m128 +f21 (__m128 x, __mmask8 y, __m256i z, void const *w) +{ + return _mm256_mmask_i64gather_ps (x, y, z, w, 1); +} + +__m128 +f22 (__m128 x, __mmask8 y, __m128i z, void const *w) +{ + return _mm_mmask_i64gather_ps (x, y, z, w, 1); +} + +__m256d +f23 (__m256d x, __mmask8 y, __m256i z, void const *w) +{ + return _mm256_mmask_i64gather_pd (x, y, z, w, 1); +} + +__m128d +f24 (__m128d x, __mmask8 y, __m128i z, void const *w) +{ + return _mm_mmask_i64gather_pd (x, y, z, w, 1); +} + +__m256i +f25 (__m256i x, __mmask8 y, __m256i z, void const *w) +{ + return _mm256_mmask_i32gather_epi32 (x, y, z, w, 1); +} + +__m128i +f26 (__m128i x, __mmask8 y, __m128i z, void const *w) +{ + return _mm_mmask_i32gather_epi32 (x, y, z, w, 1); +} + +__m256i +f27 (__m256i x, __mmask8 y, __m128i z, void const *w) +{ + return _mm256_mmask_i32gather_epi64 (x, y, z, w, 1); +} + +__m128i +f28 (__m128i x, __mmask8 y, __m128i z, void const *w) +{ + return _mm_mmask_i32gather_epi64 (x, y, z, w, 1); +} + +__m128i +f29 (__m128i x, __mmask8 y, __m256i z, void const *w) +{ + return _mm256_mmask_i64gather_epi32 (x, y, z, w, 1); +} + +__m128i +f30 (__m128i x, __mmask8 y, __m128i z, void const *w) +{ + return _mm_mmask_i64gather_epi32 (x, y, z, w, 1); +} + +__m256i +f31 (__m256i x, __mmask8 y, __m256i z, void const *w) +{ + return _mm256_mmask_i64gather_epi64 (x, y, z, w, 1); +} + +__m128i +f32 (__m128i x, __mmask8 y, __m128i z, void const *w) +{ + return _mm_mmask_i64gather_epi64 (x, y, z, w, 1); +} + +__m256 +f33 (__m256 x, __m256i z, void const *w) +{ + return _mm256_mmask_i32gather_ps (x, -1, z, w, 1); +} + +__m128 +f34 (__m128 x, __m128i z, void const *w) +{ + return _mm_mmask_i32gather_ps (x, -1, z, w, 1); +} + +__m256d +f35 (__m256d x, __m128i z, void const *w) +{ + return _mm256_mmask_i32gather_pd (x, -1, z, w, 1); +} + +__m128d +f36 (__m128d x, __m128i z, void const *w) +{ + return _mm_mmask_i32gather_pd (x, -1, z, w, 1); +} + +__m128 +f37 (__m128 x, __m256i z, void const *w) +{ + return _mm256_mmask_i64gather_ps (x, -1, z, w, 1); +} + +__m128 +f38 (__m128 x, __m128i z, void const *w) +{ + return _mm_mmask_i64gather_ps (x, -1, z, w, 1); +} + +__m256d +f39 (__m256d x, __m256i z, void const *w) +{ + return _mm256_mmask_i64gather_pd (x, -1, z, w, 1); +} + +__m128d +f40 (__m128d x, __m128i z, void const *w) +{ + return _mm_mmask_i64gather_pd (x, -1, z, w, 1); +} + +__m256i +f41 (__m256i x, __m256i z, void const *w) +{ + return _mm256_mmask_i32gather_epi32 (x, -1, z, w, 1); +} + +__m128i +f42 (__m128i x, __m128i z, void const *w) +{ + return _mm_mmask_i32gather_epi32 (x, -1, z, w, 1); +} + +__m256i +f43 (__m256i x, __m128i z, void const *w) +{ + return _mm256_mmask_i32gather_epi64 (x, -1, z, w, 1); +} + +__m128i +f44 (__m128i x, __m128i z, void const *w) +{ + return _mm_mmask_i32gather_epi64 (x, -1, z, w, 1); +} + +__m128i +f45 (__m128i x, __m256i z, void const *w) +{ + return _mm256_mmask_i64gather_epi32 (x, -1, z, w, 1); +} + +__m128i +f46 (__m128i x, __m128i z, void const *w) +{ + return _mm_mmask_i64gather_epi32 (x, -1, z, w, 1); +} + +__m256i +f47 (__m256i x, __m256i z, void const *w) +{ + return _mm256_mmask_i64gather_epi64 (x, -1, z, w, 1); +} + +__m128i +f48 (__m128i x, __m128i z, void const *w) +{ + return _mm_mmask_i64gather_epi64 (x, -1, z, w, 1); +} |