diff options
author | Levy Hsu <admin@levyhsu.com> | 2024-09-11 14:19:02 +0930 |
---|---|---|
committer | Levy Hsu <admin@levyhsu.com> | 2024-09-26 02:37:34 +0000 |
commit | 85910e650a61de6da37e3d67a7ac208904dd3c0d (patch) | |
tree | 018b32600066bd90076e03bd5bad978672986eed /gcc | |
parent | 78eef8919e2f2973ed7750ba66f5726e70614d07 (diff) | |
download | gcc-85910e650a61de6da37e3d67a7ac208904dd3c0d.zip gcc-85910e650a61de6da37e3d67a7ac208904dd3c0d.tar.gz gcc-85910e650a61de6da37e3d67a7ac208904dd3c0d.tar.bz2 |
x86: Extend AVX512 Vectorization for Popcount in Various Modes
This patch enables vectorization of the popcount operation for V2QI, V4QI,
V8QI, V2HI, V4HI, and V2SI modes.
gcc/ChangeLog:
* config/i386/mmx.md:
(VQI_16_32_64): New mode iterator for 8-byte, 4-byte, and 2-byte QImode.
(popcount<mode>2): New pattern for popcount of V2QI/V4QI/V8QI mode.
(popcount<mode>2): New pattern for popcount of V2HI/V4HI mode.
(popcountv2si2): New pattern for popcount of V2SI mode.
gcc/testsuite/ChangeLog:
* gcc.target/i386/part-vect-popcount-1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/mmx.md | 24 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c | 49 |
2 files changed, 73 insertions, 0 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index e88a06c..ca768b9 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -70,6 +70,9 @@ ;; 8-byte and 4-byte HImode vector modes (define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI]) +;; 8-byte, 4-byte and 2-byte QImode vector modes +(define_mode_iterator VI1_16_32_64 [(V8QI "TARGET_MMX_WITH_SSE") V4QI V2QI]) + ;; 4-byte and 2-byte integer vector modes (define_mode_iterator VI_16_32 [V4QI V2QI V2HI]) @@ -6803,3 +6806,24 @@ [(set_attr "type" "mmx") (set_attr "modrm" "0") (set_attr "memory" "none")]) + +(define_insn "popcount<mode>2" + [(set (match_operand:VI1_16_32_64 0 "register_operand" "=v") + (popcount:VI1_16_32_64 + (match_operand:VI1_16_32_64 1 "register_operand" "v")))] + "TARGET_AVX512VL && TARGET_AVX512BITALG" + "vpopcntb\t{%1, %0|%0, %1}") + +(define_insn "popcount<mode>2" + [(set (match_operand:VI2_32_64 0 "register_operand" "=v") + (popcount:VI2_32_64 + (match_operand:VI2_32_64 1 "register_operand" "v")))] + "TARGET_AVX512VL && TARGET_AVX512BITALG" + "vpopcntw\t{%1, %0|%0, %1}") + +(define_insn "popcountv2si2" + [(set (match_operand:V2SI 0 "register_operand" "=v") + (popcount:V2SI + (match_operand:V2SI 1 "register_operand" "v")))] + "TARGET_AVX512VPOPCNTDQ && TARGET_AVX512VL && TARGET_MMX_WITH_SSE" + "vpopcntd\t{%1, %0|%0, %1}") diff --git a/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c b/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c new file mode 100644 index 0000000..a30f6ec --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vpopcntdq -mavx512bitalg -mavx512vl" } */ +/* { dg-final { scan-assembler-times "vpopcntd\[^\n\r\]*xmm\[0-9\]" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[^\n\r\]*xmm\[0-9\]" 3 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[^\n\r\]*xmm\[0-9\]" 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[^\n\r\]*xmm\[0-9\]" 4 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[^\n\r\]*xmm\[0-9\]" 3 { target { ! ia32 } } } } */ + +void +foo1 (int* a, int* __restrict b) +{ + for (int i = 0; i != 2; i++) + a[i] = __builtin_popcount (b[i]); +} + +void +foo2 (unsigned short* a, unsigned short* __restrict b) +{ + for (int i = 0; i != 4; i++) + a[i] = __builtin_popcount (b[i]); +} + +void +foo3 (unsigned short* a, unsigned short* __restrict b) +{ + for (int i = 0; i != 2; i++) + a[i] = __builtin_popcount (b[i]); +} + +void +foo4 (unsigned char* a, unsigned char* __restrict b) +{ + for (int i = 0; i != 8; i++) + a[i] = __builtin_popcount (b[i]); +} + +void +foo5 (unsigned char* a, unsigned char* __restrict b) +{ + for (int i = 0; i != 4; i++) + a[i] = __builtin_popcount (b[i]); +} + +void +foo6 (unsigned char* a, unsigned char* __restrict b) +{ + for (int i = 0; i != 2; i++) + a[i] = __builtin_popcount (b[i]); +} |