diff options
author | Juzhe-Zhong <juzhe.zhong@rivai.ai> | 2023-11-24 13:04:18 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2023-11-24 13:08:21 +0800 |
commit | af7a422da457aa13df8eb1feb601dffafb76ed7c (patch) | |
tree | 56a6d868bf4fec7517595bb6a806d6b446043b39 | |
parent | cff1fa6625d1273fcfaf473e436ba918262d8afa (diff) | |
download | gcc-af7a422da457aa13df8eb1feb601dffafb76ed7c.zip gcc-af7a422da457aa13df8eb1feb601dffafb76ed7c.tar.gz gcc-af7a422da457aa13df8eb1feb601dffafb76ed7c.tar.bz2 |
RISC-V: Disable BSWAP optimization for NUNITS < 4
When fixing bugs, I notice there is a piece odd codes look incorrect.
which probably make codegen worse.
#include <stdint.h>
typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
#define MASK_2(X, Y) (Y) - 1 - (X), (Y) - 2 - (X)
#define PERMUTE(TYPE, NUNITS) \
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
TYPE *out) \
{ \
TYPE v \
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
*(TYPE *) out = v; \
}
#define TEST_ALL(T) \
T (vnx2qi, 2)
TEST_ALL (PERMUTE)
Before this patch:
vsetivli zero,2,e8,mf8,ta,ma
vle8.v v1,0(a0)
vsetivli zero,1,e16,mf4,ta,ma
vsrl.vi v2,v1,8
vsll.vi v1,v1,8
vor.vv v1,v2,v1
vsetivli zero,2,e8,mf8,ta,ma
vse8.v v1,0(a2)
ret
After this patch:
vsetivli zero,2,e8,mf8,ta,ma
vle8.v v3,0(a0)
vid.v v1
vrsub.vi v1,v1,1
vrgather.vv v2,v3,v1
vse8.v v2,0(a2)
ret
Committed as it is very obvious if during code review.
gcc/ChangeLog:
* config/riscv/riscv-v.cc (shuffle_bswap_pattern): Disable for NUNIT < 4.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c: Adapt test.
* gcc.target/riscv/rvv/autovec/vls/perm-4.c: Ditto.
-rw-r--r-- | gcc/config/riscv/riscv-v.cc | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c | 4 |
3 files changed, 9 insertions, 4 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 72b96d8..acf4097 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3201,6 +3201,11 @@ shuffle_bswap_pattern (struct expand_vec_perm_d *d) if (!d->perm.series_p (i, step, diff - i, step)) return false; + /* Disable when nunits < 4 since the later generic approach + is more profitable on BSWAP. */ + if (!known_gt (GET_MODE_NUNITS (d->vmode), 2)) + return false; + if (d->testing_p) return true; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c index b235ec7..7ab3104 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c @@ -55,7 +55,7 @@ TEST_ALL (PERMUTE) -/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 18 } } */ +/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 19 } } */ /* { dg-final { scan-assembler-times {vrgatherei16\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 12 } } */ -/* { dg-final { scan-assembler-times {vrsub\.vi} 23 } } */ +/* { dg-final { scan-assembler-times {vrsub\.vi} 24 } } */ /* { dg-final { scan-assembler-times {vrsub\.vx} 7 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c index d2d4938..4d6862c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c @@ -3,7 +3,7 @@ #include "../vls-vlmax/perm-4.c" -/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 18 } } */ +/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 19 } } */ /* { dg-final { scan-assembler-times {vrgatherei16\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 12 } } */ -/* { dg-final { scan-assembler-times {vrsub\.vi} 23 } } */ +/* { dg-final { scan-assembler-times {vrsub\.vi} 24 } } */ /* { dg-final { scan-assembler-times {vrsub\.vx} 7 } } */ |