aboutsummaryrefslogtreecommitdiff
path: root/libgcc
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-06-01 16:32:12 +0800
committerPan Li <pan2.li@intel.com>2023-06-03 09:42:44 +0800
commit2e3401bd71b59ca0e03f051c5db286c32299b940 (patch)
tree57c8c8b278cfe613b46c250f6b504bfc69a82d62 /libgcc
parent829d597548549709fcbfdba03ad6374174d11ec6 (diff)
downloadgcc-2e3401bd71b59ca0e03f051c5db286c32299b940.zip
gcc-2e3401bd71b59ca0e03f051c5db286c32299b940.tar.gz
gcc-2e3401bd71b59ca0e03f051c5db286c32299b940.tar.bz2
RISC-V: Add pseudo vwmul.wv pattern to enhance vwmul.vv instruction optimizations
This patch is to enhance vwmul.vv combine optimizations. Consider this following code: void vwadd_int16_t_int8_t (int16_t *__restrict dst, int16_t *__restrict dst2, int16_t *__restrict dst3, int16_t *__restrict dst4, int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict a2, int8_t *__restrict b2, int n) { for (int i = 0; i < n; i++) { dst[i] = (int16_t) a[i] * (int16_t) b[i]; dst2[i] = (int16_t) a2[i] * (int16_t) b[i]; dst3[i] = (int16_t) a2[i] * (int16_t) a[i]; dst4[i] = (int16_t) a[i] * (int16_t) b2[i]; } } In such complicate case, the operand is not single used, used by multiple statements. GCC combine optimization will iterate the combination of the operands. Also, we add another pattern of vwmulsu.vv to enhance the vwmulsu.vv optimization. Currently, we have format: (mult: (sign_extend) (zero_extend)) in vector.md for intrinsics calling. Now, we add a new vwmulsu.ww with this format: (mult: (zero_extend) (sign_extend)) To handle this following cases (sign and unsigned widening multiplication mixing codes): void vwadd_int16_t_int8_t (int16_t *__restrict dst, int16_t *__restrict dst2, int16_t *__restrict dst3, int16_t *__restrict dst4, int8_t *__restrict a, uint8_t *__restrict b, uint8_t *__restrict a2, int8_t *__restrict b2, int n) { for (int i = 0; i < n; i++) { dst[i] = (int16_t) a[i] * (int16_t) b[i]; dst2[i] = (int16_t) a2[i] * (int16_t) b[i]; dst3[i] = (int16_t) a2[i] * (int16_t) a[i]; dst4[i] = (int16_t) a[i] * (int16_t) b2[i]; } } Before this patch: ... vsext.vf2 v6,v1 add t0,a0,t4 vzext.vf2 v4,v1 vmul.vv v2,v4,v6 add t0,a1,t4 vzext.vf2 v2,v1 vmul.vv v4,v2,v4 add t0,a2,t4 vmul.vv v2,v2,v6 add t0,a3,t4 sub t6,t6,t1 vsext.vf2 v2,v1 vmul.vv v2,v2,v6 ... After this patch: ... add t0,a0,t3 vwmulsu.vv v2,v1,v3 add t0,a1,t3 vwmulu.vv v4,v3,v2 add t0,a2,t3 vwmulsu.vv v3,v1,v2 add t0,a3,t3 sub t4,t4,t1 vwmul.vv v2,v1,v3 ... gcc/ChangeLog: * config/riscv/vector.md: Add vector-opt.md. * config/riscv/autovec-opt.md: New file. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/widen/widen-7.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen-complicate-4.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_run-7.c: New test.
Diffstat (limited to 'libgcc')
0 files changed, 0 insertions, 0 deletions