diff options
author | Hongyu Wang <hongyu.wang@intel.com> | 2020-11-11 09:41:13 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2020-12-03 10:02:14 +0800 |
commit | 81d590760c31e11e3a09135f4e182aea232035f2 (patch) | |
tree | 57f48ec126c1f100e1a9a314df215499a497a6ed | |
parent | c05ece92c6153289fd6055e31e791e59b8ac4121 (diff) | |
download | gcc-81d590760c31e11e3a09135f4e182aea232035f2.zip gcc-81d590760c31e11e3a09135f4e182aea232035f2.tar.gz gcc-81d590760c31e11e3a09135f4e182aea232035f2.tar.bz2 |
Add popcount<mode> expander to enable popcount auto vectorization under AVX512BITALG/AVX512POPCNTDQ target.
gcc/ChangeLog
PR target/97770
* config/i386/sse.md (popcount<mode>2): New expander
for SI/DI vector modes.
(popcount<mode>2): Likewise for QI/HI vector modes.
gcc/testsuite/ChangeLog
PR target/97770
* gcc.target/i386/avx512bitalg-pr97770-1.c: New test.
* gcc.target/i386/avx512vpopcntdq-pr97770-1.c: Likewise.
* gcc.target/i386/avx512vpopcntdq-pr97770-2.c: Likewise.
* gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c: Likewise.
-rw-r--r-- | gcc/config/i386/sse.md | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c | 60 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c | 63 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-2.c | 39 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c | 14 |
5 files changed, 188 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4aad462..559f842 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -22702,6 +22702,12 @@ (set_attr ("prefix") ("evex")) (set_attr ("mode") ("TI"))]) +(define_expand "popcount<mode>2" + [(set (match_operand:VI48_AVX512VL 0 "register_operand") + (popcount:VI48_AVX512VL + (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))] + "TARGET_AVX512VPOPCNTDQ") + (define_insn "vpopcount<mode><mask_name>" [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") (popcount:VI48_AVX512VL @@ -22746,6 +22752,12 @@ "TARGET_SSE && TARGET_64BIT" "jmp\t%P1") +(define_expand "popcount<mode>2" + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") + (popcount:VI12_AVX512VL + (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512BITALG") + (define_insn "vpopcount<mode><mask_name>" [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") (popcount:VI12_AVX512VL diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c new file mode 100644 index 0000000..c83a477 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c @@ -0,0 +1,60 @@ +/* PR target/97770 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bitalg -mavx512vl -mprefer-vector-width=512" } */ +/* Add xfail since no IFN for QI/HImode popcount */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */ + +#include <immintrin.h> + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountb_128 (char * __restrict dest, char* src) +{ + for (int i = 0; i != 16; i++) + dest[i] = __builtin_popcount (src[i]); +} + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountw_128 (short* __restrict dest, short* src) +{ + for (int i = 0; i != 8; i++) + dest[i] = __builtin_popcount (src[i]); +} + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountb_256 (char * __restrict dest, char* src) +{ + for (int i = 0; i != 32; i++) + dest[i] = __builtin_popcount (src[i]); +} + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountw_256 (short* __restrict dest, short* src) +{ + for (int i = 0; i != 16; i++) + dest[i] = __builtin_popcount (src[i]); +} + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountb_512 (char * __restrict dest, char* src) +{ + for (int i = 0; i != 64; i++) + dest[i] = __builtin_popcount (src[i]); +} + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountw_512 (short* __restrict dest, short* src) +{ + for (int i = 0; i != 32; i++) + dest[i] = __builtin_popcount (src[i]); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c new file mode 100644 index 0000000..63bb00d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c @@ -0,0 +1,63 @@ +/* PR target/97770 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl -mprefer-vector-width=512" } */ +/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */ +/* Add xfail since current vectorizor cannot generate expected code for DImode popcount */ +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 { xfail *-*-* } } } */ +#ifndef AVX512VPOPCNTQ_H_INCLUDED +#define AVX512VPOPCNTQ_H_INCLUDED + +#include <immintrin.h> + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountd_128 (int* __restrict dest, int* src) +{ + for (int i = 0; i != 4; i++) + dest[i] = __builtin_popcount (src[i]); +} + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountq_128 (long long* __restrict dest, long long* src) +{ + for (int i = 0; i != 2; i++) + dest[i] = __builtin_popcountll (src[i]); +} + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountd_256 (int* __restrict dest, int* src) +{ + for (int i = 0; i != 8; i++) + dest[i] = __builtin_popcount (src[i]); +} + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountq_256 (long long* __restrict dest, long long* src) +{ + for (int i = 0; i != 4; i++) + dest[i] = __builtin_popcountll (src[i]); +} + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountd_512 (int* __restrict dest, int* src) +{ + for (int i = 0; i != 16; i++) + dest[i] = __builtin_popcount (src[i]); +} + +void +__attribute__ ((noipa, optimize("-O3"))) +popcountq_512 (long long* __restrict dest, long long* src) +{ + for (int i = 0; i != 8; i++) + dest[i] = __builtin_popcountll (src[i]); +} +#endif diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-2.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-2.c new file mode 100644 index 0000000..339dc29 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-2.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vpopcntdq" } */ + +#define AVX512VPOPCNTDQ + +#include "avx512f-helper.h" +#include "avx512vpopcntdq-pr97770-1.c" + +#define SIZE_D AVX512F_LEN / 32 +#define SIZE_Q AVX512F_LEN / 64 + + +#define RTEST(TYPE, LEN, SIZE, MODE) \ + do \ + { \ + TYPE res[SIZE], src[SIZE], res_ref[SIZE], v; \ + int i, j, ret; \ + for (i = 0; i < SIZE; i++) \ + { \ + v = src[i] = i * 2 + 3; \ + ret = 0; \ + for (j = 0; j < sizeof(v) * 8; j++) \ + if ((v & ((TYPE)1 << (TYPE) j))) \ + ret++; \ + res_ref[i] = ret; \ + } \ + EVAL(popcount, MODE, LEN) (res, src); \ + for (i = 0; i < SIZE; i++) \ + if (res[i] != res_ref[i]) \ + abort (); \ + } \ + while (0) + +void +TEST (void) +{ + RTEST (long long, AVX512F_LEN, SIZE_Q, q_); + RTEST (int, AVX512F_LEN, SIZE_D, d_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c new file mode 100644 index 0000000..7a34f15 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mavx512vpopcntdq -mavx512vl" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512vpopcntdq-pr97770-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512vpopcntdq-pr97770-2.c" |