aboutsummaryrefslogtreecommitdiff
path: root/libgcc
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-05-15 14:00:59 +0800
committerPan Li <pan2.li@intel.com>2023-05-15 14:06:46 +0800
commitf2afe68a1755fe627ae7dca9cc6c3a72bb8222f9 (patch)
treec45b2e4c1a016a685f0b96966fc08d66bbf348cc /libgcc
parentb4c6faa5375c6ea55f6a7f6d188ee404bf9922de (diff)
downloadgcc-f2afe68a1755fe627ae7dca9cc6c3a72bb8222f9.zip
gcc-f2afe68a1755fe627ae7dca9cc6c3a72bb8222f9.tar.gz
gcc-f2afe68a1755fe627ae7dca9cc6c3a72bb8222f9.tar.bz2
RISC-V: Support TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT to optimize codegen of both VLA && VLS auto-vectorization
This patch optimizes both RVV VLA && VLS vectorization. Consider this following case: void __attribute__((noinline, noclone)) f (int * __restrict dst, int * __restrict op1, int * __restrict op2, int count) { for (int i = 0; i < count; ++i) dst[i] = op1[i] + op2[i]; } VLA: Before this patch: ble a3,zero,.L1 srli a4,a1,2 negw a4,a4 andi a5,a4,3 sext.w a3,a3 beq a5,zero,.L3 lw a7,0(a1) lw a6,0(a2) andi a4,a4,2 addw a6,a6,a7 sw a6,0(a0) beq a4,zero,.L3 lw a7,4(a1) lw a4,4(a2) li a6,3 addw a4,a4,a7 sw a4,4(a0) bne a5,a6,.L3 lw a6,8(a2) lw a4,8(a1) addw a4,a4,a6 sw a4,8(a0) .L3: subw a3,a3,a5 slli a4,a3,32 csrr a6,vlenb srli a4,a4,32 srli a6,a6,2 slli a3,a5,2 mv a5,a4 bgtu a4,a6,.L17 .L5: csrr a6,vlenb add a1,a1,a3 add a2,a2,a3 add a0,a0,a3 srli a7,a6,2 li a3,0 .L8: vsetvli zero,a5,e32,m1,ta,ma vle32.v v1,0(a1) vle32.v v2,0(a2) vsetvli t1,zero,e32,m1,ta,ma add a3,a3,a7 vadd.vv v1,v1,v2 vsetvli zero,a5,e32,m1,ta,ma vse32.v v1,0(a0) mv a5,a4 bleu a4,a3,.L6 mv a5,a3 .L6: sub a5,a4,a5 bleu a5,a7,.L7 mv a5,a7 .L7: add a1,a1,a6 add a2,a2,a6 add a0,a0,a6 bne a5,zero,.L8 .L1: ret .L17: mv a5,a6 j .L5 After this patch: f: ble a3,zero,.L1 csrr a4,vlenb srli a4,a4,2 mv a5,a3 bgtu a3,a4,.L9 .L3: csrr a6,vlenb li a4,0 srli a7,a6,2 .L6: vsetvli zero,a5,e32,m1,ta,ma vle32.v v2,0(a1) vle32.v v1,0(a2) vsetvli t1,zero,e32,m1,ta,ma add a4,a4,a7 vadd.vv v1,v1,v2 vsetvli zero,a5,e32,m1,ta,ma vse32.v v1,0(a0) mv a5,a3 bleu a3,a4,.L4 mv a5,a4 .L4: sub a5,a3,a5 bleu a5,a7,.L5 mv a5,a7 .L5: add a0,a0,a6 add a2,a2,a6 add a1,a1,a6 bne a5,zero,.L6 .L1: ret .L9: mv a5,a4 j .L3 VLS: Before this patch: f3: ble a3,zero,.L1 srli a5,a1,2 negw a5,a5 andi a4,a5,3 sext.w a3,a3 beq a4,zero,.L3 lw a7,0(a1) lw a6,0(a2) andi a5,a5,2 addw a6,a6,a7 sw a6,0(a0) beq a5,zero,.L3 lw a7,4(a1) lw a5,4(a2) li a6,3 addw a5,a5,a7 sw a5,4(a0) bne a4,a6,.L3 lw a6,8(a2) lw a5,8(a1) addw a5,a5,a6 sw a5,8(a0) .L3: subw a3,a3,a4 slli a6,a4,2 slli a5,a3,32 srli a5,a5,32 add a1,a1,a6 add a2,a2,a6 add a0,a0,a6 li a3,4 .L6: mv a4,a5 bleu a5,a3,.L5 li a4,4 .L5: vsetvli zero,a4,e32,m1,ta,ma vle32.v v1,0(a1) vle32.v v2,0(a2) vsetivli zero,4,e32,m1,ta,ma sub a5,a5,a4 vadd.vv v1,v1,v2 vsetvli zero,a4,e32,m1,ta,ma vse32.v v1,0(a0) addi a1,a1,16 addi a2,a2,16 addi a0,a0,16 bne a5,zero,.L6 .L1: ret After this patch: f3: ble a3,zero,.L1 li a4,4 .L4: mv a5,a3 bleu a3,a4,.L3 li a5,4 .L3: vsetvli zero,a5,e32,m1,ta,ma vle32.v v2,0(a1) vle32.v v1,0(a2) vsetivli zero,4,e32,m1,ta,ma sub a3,a3,a5 vadd.vv v1,v1,v2 vsetvli zero,a5,e32,m1,ta,ma vse32.v v1,0(a0) addi a2,a2,16 addi a0,a0,16 addi a1,a1,16 bne a3,zero,.L4 .L1: ret Signed-off-by: Juzhe-Zhong <juzhe.zhong@rivai.ai> gcc/ChangeLog: * config/riscv/riscv.cc (riscv_vectorize_preferred_vector_alignment): New function. (TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT): New target hook. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/shift-rv32gcv.c: Adapt testcase. * gcc.target/riscv/rvv/autovec/align-1.c: New test. * gcc.target/riscv/rvv/autovec/align-2.c: New test.
Diffstat (limited to 'libgcc')
0 files changed, 0 insertions, 0 deletions