aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxuli <xuli1@eswincomputing.com>2023-09-22 01:25:39 +0000
committerxuli <xuli1@eswincomputing.com>2023-09-22 04:19:32 +0000
commit0ed05db7cee8f92604b5d7761713b7a7161e0db0 (patch)
treeab8ddc38f4340450c0c105eaa7c01690df867669
parente446ed29f6c92dd677ec5792aada5343c9533c2c (diff)
downloadgcc-0ed05db7cee8f92604b5d7761713b7a7161e0db0.zip
gcc-0ed05db7cee8f92604b5d7761713b7a7161e0db0.tar.gz
gcc-0ed05db7cee8f92604b5d7761713b7a7161e0db0.tar.bz2
RISC-V: Optimization of vrgather.vv into vrgatherei16.vv[PR111451]
Consider this following case: typedef int32_t vnx32si __attribute__ ((vector_size (128))); __attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \ TYPE *out) \ { \ TYPE v \ = __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \ *(TYPE *) out = v; \ } T (vnx32si, 32) \ TEST_ALL (PERMUTE) Before this patch: li a4,31 vsetvli a5,zero,e32,m8,ta,ma vl8re32.v v24,0(a0) vid.v v8 vrsub.vx v8,v8,a4 vrgather.vv v16,v24,v8 vs8r.v v16,0(a2) ret The index vector register "v8" occupies 8 registers. We should optimize it into vrgatherei16.vv which is using int16 as the index elements. After this patch: vsetvli a5,zero,e16,m4,ta,ma li a4,31 vid.v v4 vl8re32.v v16,0(a0) vrsub.vx v4,v4,a4 vsetvli zero,zero,e32,m8,ta,ma vrgatherei16.vv v8,v16,v4 vs8r.v v8,0(a2) ret With vrgatherei16.vv, the v8 will occupy 4 registers instead of 8. Lower the register consuming and register pressure. PR target/111451 gcc/ChangeLog: * config/riscv/riscv-v.cc (emit_vlmax_gather_insn): Optimization of vrgather.vv into vrgatherei16.vv. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c: Adjust case. * gcc.target/riscv/rvv/autovec/vls/perm-4.c: Ditto.
-rw-r--r--gcc/config/riscv/riscv-v.cc18
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c3
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c3
3 files changed, 22 insertions, 2 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e826621..8379491 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -788,6 +788,24 @@ emit_vlmax_gather_insn (rtx target, rtx op, rtx sel)
icode = code_for_pred_gather_scalar (data_mode);
sel = elt;
}
+ else if (CONST_VECTOR_P (sel)
+ && GET_MODE_BITSIZE (GET_MODE_INNER (sel_mode)) > 16
+ && riscv_get_v_regno_alignment (data_mode) > 1)
+ {
+ /* If the inner mode of data is not QI or HI and data_lmul > 1,
+ emitting vrgatherei16.vv instruction will lower register
+ pressure.
+ data_mode sel_mode ei16
+ RVVM1QI RVVM1QI RVVM2HI not needed
+ RVVM2QI RVVM2QI RVVM4HI not needed
+ RVVM2HI RVVM2HI RVVM2HI not needed
+ RVVM2SI RVVM2SI RVVM1HI need
+ RVVM4SI RVVM4SI RVVM2HI need
+ RVVM8DI RVVM8DI RVVM2HI need */
+ PUT_MODE (sel, get_vector_mode (HImode,
+ GET_MODE_NUNITS (data_mode)).require ());
+ icode = code_for_pred_gatherei16 (data_mode);
+ }
else
icode = code_for_pred_gather (data_mode);
rtx ops[] = {target, op, sel};
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c
index 9df69a0..7ab3104 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c
@@ -55,6 +55,7 @@
TEST_ALL (PERMUTE)
-/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 31 } } */
+/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 19 } } */
+/* { dg-final { scan-assembler-times {vrgatherei16\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 12 } } */
/* { dg-final { scan-assembler-times {vrsub\.vi} 24 } } */
/* { dg-final { scan-assembler-times {vrsub\.vx} 7 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c
index 46cad8e..4d6862c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c
@@ -3,6 +3,7 @@
#include "../vls-vlmax/perm-4.c"
-/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 31 } } */
+/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 19 } } */
+/* { dg-final { scan-assembler-times {vrgatherei16\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 12 } } */
/* { dg-final { scan-assembler-times {vrsub\.vi} 24 } } */
/* { dg-final { scan-assembler-times {vrsub\.vx} 7 } } */