aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-11-24 13:04:18 +0800
committerPan Li <pan2.li@intel.com>2023-11-24 13:08:21 +0800
commitaf7a422da457aa13df8eb1feb601dffafb76ed7c (patch)
tree56a6d868bf4fec7517595bb6a806d6b446043b39
parentcff1fa6625d1273fcfaf473e436ba918262d8afa (diff)
downloadgcc-af7a422da457aa13df8eb1feb601dffafb76ed7c.zip
gcc-af7a422da457aa13df8eb1feb601dffafb76ed7c.tar.gz
gcc-af7a422da457aa13df8eb1feb601dffafb76ed7c.tar.bz2
RISC-V: Disable BSWAP optimization for NUNITS < 4
When fixing bugs, I notice there is a piece odd codes look incorrect. which probably make codegen worse. #include <stdint.h> typedef int8_t vnx2qi __attribute__ ((vector_size (2))); #define MASK_2(X, Y) (Y) - 1 - (X), (Y) - 2 - (X) #define PERMUTE(TYPE, NUNITS) \ __attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \ TYPE *out) \ { \ TYPE v \ = __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \ *(TYPE *) out = v; \ } #define TEST_ALL(T) \ T (vnx2qi, 2) TEST_ALL (PERMUTE) Before this patch: vsetivli zero,2,e8,mf8,ta,ma vle8.v v1,0(a0) vsetivli zero,1,e16,mf4,ta,ma vsrl.vi v2,v1,8 vsll.vi v1,v1,8 vor.vv v1,v2,v1 vsetivli zero,2,e8,mf8,ta,ma vse8.v v1,0(a2) ret After this patch: vsetivli zero,2,e8,mf8,ta,ma vle8.v v3,0(a0) vid.v v1 vrsub.vi v1,v1,1 vrgather.vv v2,v3,v1 vse8.v v2,0(a2) ret Committed as it is very obvious if during code review. gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_bswap_pattern): Disable for NUNIT < 4. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/perm-4.c: Ditto.
-rw-r--r--gcc/config/riscv/riscv-v.cc5
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c4
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c4
3 files changed, 9 insertions, 4 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 72b96d8..acf4097 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3201,6 +3201,11 @@ shuffle_bswap_pattern (struct expand_vec_perm_d *d)
if (!d->perm.series_p (i, step, diff - i, step))
return false;
+ /* Disable when nunits < 4 since the later generic approach
+ is more profitable on BSWAP. */
+ if (!known_gt (GET_MODE_NUNITS (d->vmode), 2))
+ return false;
+
if (d->testing_p)
return true;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c
index b235ec7..7ab3104 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c
@@ -55,7 +55,7 @@
TEST_ALL (PERMUTE)
-/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 18 } } */
+/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 19 } } */
/* { dg-final { scan-assembler-times {vrgatherei16\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 12 } } */
-/* { dg-final { scan-assembler-times {vrsub\.vi} 23 } } */
+/* { dg-final { scan-assembler-times {vrsub\.vi} 24 } } */
/* { dg-final { scan-assembler-times {vrsub\.vx} 7 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c
index d2d4938..4d6862c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c
@@ -3,7 +3,7 @@
#include "../vls-vlmax/perm-4.c"
-/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 18 } } */
+/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 19 } } */
/* { dg-final { scan-assembler-times {vrgatherei16\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 12 } } */
-/* { dg-final { scan-assembler-times {vrsub\.vi} 23 } } */
+/* { dg-final { scan-assembler-times {vrsub\.vi} 24 } } */
/* { dg-final { scan-assembler-times {vrsub\.vx} 7 } } */