diff options
author | Juzhe-Zhong <juzhe.zhong@rivai.ai> | 2023-06-12 10:41:02 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2023-06-12 20:54:50 +0800 |
commit | 6631fe419c6e47121e54fa3bbcc330dc04efd9a0 (patch) | |
tree | 2919a4108938eb35ab002ec5c3bb0a61403d0c05 | |
parent | 921b841350c4fc298d09f6c5674663e0f4208610 (diff) | |
download | gcc-6631fe419c6e47121e54fa3bbcc330dc04efd9a0.zip gcc-6631fe419c6e47121e54fa3bbcc330dc04efd9a0.tar.gz gcc-6631fe419c6e47121e54fa3bbcc330dc04efd9a0.tar.bz2 |
RISC-V: Add RVV narrow shift right lowering auto-vectorization
Optimize the following auto-vectorization codes:
void foo (int16_t * __restrict a, int32_t * __restrict b, int32_t c, int n)
{
for (int i = 0; i < n; i++)
a[i] = b[i] >> c;
}
Before this patch:
foo:
ble a3,zero,.L5
.L3:
vsetvli a5,a3,e32,m1,ta,ma
vle32.v v1,0(a1)
vsetvli a4,zero,e32,m1,ta,ma
vsra.vx v1,v1,a2
vsetvli zero,zero,e16,mf2,ta,ma
slli a7,a5,2
vncvt.x.x.w v1,v1
slli a6,a5,1
vsetvli zero,a5,e16,mf2,ta,ma
sub a3,a3,a5
vse16.v v1,0(a0)
add a1,a1,a7
add a0,a0,a6
bne a3,zero,.L3
.L5:
ret
After this patch:
foo:
ble a3,zero,.L5
.L3:
vsetvli a5,a3,e32,m1,ta,ma
vle32.v v1,0(a1)
vsetvli a7,zero,e16,mf2,ta,ma
slli a6,a5,2
vnsra.wx v1,v1,a2
slli a4,a5,1
vsetvli zero,a5,e16,mf2,ta,ma
sub a3,a3,a5
vse16.v v1,0(a0)
add a1,a1,a6
add a0,a0,a4
bne a3,zero,.L3
.L5:
ret
gcc/ChangeLog:
* config/riscv/autovec-opt.md
(*v<any_shiftrt:optab><any_extend:optab>trunc<mode>): New pattern.
(*<any_shiftrt:optab>trunc<mode>): Ditto.
* config/riscv/autovec.md (<optab><mode>3): Change to
define_insn_and_split.
(v<optab><mode>3): Ditto.
(trunc<mode><v_double_trunc>2): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/binop/narrow-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c: New test.
8 files changed, 311 insertions, 14 deletions
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 7bb93ee..aef28e4 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -330,3 +330,49 @@ } [(set_attr "type" "viwmuladd") (set_attr "mode" "<V_DOUBLE_TRUNC>")]) + +;; ------------------------------------------------------------------------- +;; ---- [INT] Binary narrow shifts. +;; ------------------------------------------------------------------------- +;; Includes: +;; - vnsrl.wv/vnsrl.wx/vnsrl.wi +;; - vnsra.wv/vnsra.wx/vnsra.wi +;; ------------------------------------------------------------------------- + +(define_insn_and_split "*v<any_shiftrt:optab><any_extend:optab>trunc<mode>" + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr,vr") + (truncate:<V_DOUBLE_TRUNC> + (any_shiftrt:VWEXTI + (match_operand:VWEXTI 1 "register_operand" " vr,vr") + (any_extend:VWEXTI + (match_operand:<V_DOUBLE_TRUNC> 2 "vector_shift_operand" " vr,vk")))))] + "TARGET_VECTOR" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + insn_code icode = code_for_pred_narrow (<any_shiftrt:CODE>, <MODE>mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands); + DONE; +} + [(set_attr "type" "vnshift") + (set_attr "mode" "<V_DOUBLE_TRUNC>")]) + +(define_insn_and_split "*<any_shiftrt:optab>trunc<mode>" + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr") + (truncate:<V_DOUBLE_TRUNC> + (any_shiftrt:VWEXTI + (match_operand:VWEXTI 1 "register_operand" " vr") + (match_operand:<VEL> 2 "csr_operand" " rK"))))] + "TARGET_VECTOR" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + operands[2] = gen_lowpart (Pmode, operands[2]); + insn_code icode = code_for_pred_narrow_scalar (<any_shiftrt:CODE>, <MODE>mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands); + DONE; +} + [(set_attr "type" "vnshift") + (set_attr "mode" "<V_DOUBLE_TRUNC>")]) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index b707009..eadc2c5 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -150,18 +150,23 @@ ;; - vsll.vi/vsra.vi/vsrl.vi ;; ------------------------------------------------------------------------- -(define_expand "<optab><mode>3" - [(set (match_operand:VI 0 "register_operand") +(define_insn_and_split "<optab><mode>3" + [(set (match_operand:VI 0 "register_operand" "=vr") (any_shift:VI - (match_operand:VI 1 "register_operand") - (match_operand:<VEL> 2 "csr_operand")))] + (match_operand:VI 1 "register_operand" " vr") + (match_operand:<VEL> 2 "csr_operand" " rK")))] "TARGET_VECTOR" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] { operands[2] = gen_lowpart (Pmode, operands[2]); riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode), riscv_vector::RVV_BINOP, operands); DONE; -}) +} + [(set_attr "type" "vshift") + (set_attr "mode" "<MODE>")]) ;; ------------------------------------------------------------------------- ;; ---- [INT] Binary shifts by scalar. @@ -170,17 +175,22 @@ ;; - vsll.vv/vsra.vv/vsrl.vv ;; ------------------------------------------------------------------------- -(define_expand "v<optab><mode>3" - [(set (match_operand:VI 0 "register_operand") +(define_insn_and_split "v<optab><mode>3" + [(set (match_operand:VI 0 "register_operand" "=vr,vr") (any_shift:VI - (match_operand:VI 1 "register_operand") - (match_operand:VI 2 "vector_shift_operand")))] + (match_operand:VI 1 "register_operand" " vr,vr") + (match_operand:VI 2 "vector_shift_operand" " vr,vk")))] "TARGET_VECTOR" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] { riscv_vector::emit_vlmax_insn (code_for_pred (<CODE>, <MODE>mode), riscv_vector::RVV_BINOP, operands); DONE; -}) +} + [(set_attr "type" "vshift") + (set_attr "mode" "<MODE>")]) ;; ------------------------------------------------------------------------- ;; ---- [BOOL] Binary logical operations @@ -395,16 +405,21 @@ ;; ------------------------------------------------------------------------- ;; - vncvt.x.x.w ;; ------------------------------------------------------------------------- -(define_expand "trunc<mode><v_double_trunc>2" - [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") +(define_insn_and_split "trunc<mode><v_double_trunc>2" + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr") (truncate:<V_DOUBLE_TRUNC> - (match_operand:VWEXTI 1 "register_operand")))] + (match_operand:VWEXTI 1 "register_operand" " vr")))] "TARGET_VECTOR" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] { insn_code icode = code_for_pred_trunc (<MODE>mode); riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands); DONE; -}) +} + [(set_attr "type" "vshift") + (set_attr "mode" "<MODE>")]) ;; ------------------------------------------------------------------------- ;; Truncation to a mode whose inner mode size is a quarter of mode's. diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c new file mode 100644 index 0000000..3de8d85 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */ + +#include <stdint-gcc.h> + +#define TEST_TYPE(TYPE1, TYPE2) \ + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \ + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 *__restrict b, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = a[i] >> b[i]; \ + } + +#define TEST_ALL() \ + TEST_TYPE (int16_t, int8_t) \ + TEST_TYPE (int16_t, uint8_t) \ + TEST_TYPE (uint16_t, int8_t) \ + TEST_TYPE (uint16_t, uint8_t) \ + TEST_TYPE (int32_t, int16_t) \ + TEST_TYPE (int32_t, uint16_t) \ + TEST_TYPE (uint32_t, int16_t) \ + TEST_TYPE (uint32_t, uint16_t) \ + TEST_TYPE (int64_t, int32_t) \ + TEST_TYPE (int64_t, uint32_t) \ + TEST_TYPE (uint64_t, int32_t) \ + TEST_TYPE (uint64_t, uint32_t) + +TEST_ALL () + +/* { dg-final { scan-assembler-times {\tvnsra\.wv} 6 } } */ +/* { dg-final { scan-assembler-times {\tvnsrl\.wv} 5 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c new file mode 100644 index 0000000..e5c2e37 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */ + +#include <stdint-gcc.h> + +#define TEST_TYPE(TYPE1, TYPE2) \ + __attribute__ (( \ + noipa)) void vnshift_##TYPE1##_##TYPE2 (TYPE2 *__restrict dst, \ + TYPE1 *__restrict a, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = a[i] >> 7; \ + } + +#define TEST_ALL() \ + TEST_TYPE (int16_t, int8_t) \ + TEST_TYPE (int16_t, uint8_t) \ + TEST_TYPE (uint16_t, int8_t) \ + TEST_TYPE (uint16_t, uint8_t) \ + TEST_TYPE (int32_t, int16_t) \ + TEST_TYPE (int32_t, uint16_t) \ + TEST_TYPE (uint32_t, int16_t) \ + TEST_TYPE (uint32_t, uint16_t) \ + TEST_TYPE (int64_t, int32_t) \ + TEST_TYPE (int64_t, uint32_t) \ + TEST_TYPE (uint64_t, int32_t) \ + TEST_TYPE (uint64_t, uint32_t) + +TEST_ALL () + +/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */ +/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c new file mode 100644 index 0000000..3b28846 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */ + +#include <stdint-gcc.h> + +#define TEST_TYPE(TYPE1, TYPE2) \ + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \ + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 b, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = a[i] >> b; \ + } + +#define TEST_ALL() \ + TEST_TYPE (int16_t, int8_t) \ + TEST_TYPE (int16_t, uint8_t) \ + TEST_TYPE (uint16_t, int8_t) \ + TEST_TYPE (uint16_t, uint8_t) \ + TEST_TYPE (int32_t, int16_t) \ + TEST_TYPE (int32_t, uint16_t) \ + TEST_TYPE (uint32_t, int16_t) \ + TEST_TYPE (uint32_t, uint16_t) \ + TEST_TYPE (int64_t, int32_t) \ + TEST_TYPE (int64_t, uint32_t) \ + TEST_TYPE (uint64_t, int32_t) \ + TEST_TYPE (uint64_t, uint32_t) + +TEST_ALL () + +/* { dg-final { scan-assembler-times {\tvnsra\.wx} 4 } } */ +/* { dg-final { scan-assembler-times {\tvnsrl\.wx} 4 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c new file mode 100644 index 0000000..2a89810 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c @@ -0,0 +1,50 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ + +#include <assert.h> +#include "narrow-1.c" + +#define RUN(TYPE1, TYPE2, SZ) \ + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \ + TYPE2 b##TYPE1##_##TYPE2##_##SZ[SZ]; \ + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \ + b##TYPE1##_##TYPE2##_##SZ[i] = i % (sizeof (TYPE2) * 3); \ + } \ + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \ + a##TYPE1##_##TYPE2##_##SZ, \ + b##TYPE1##_##TYPE2##_##SZ, SZ); \ + for (int i = 0; i < SZ; i++) \ + { \ + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \ + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] \ + >> b##TYPE1##_##TYPE2##_##SZ[i])); \ + } + +#define RUN_ALL(SZ) \ + RUN (int16_t, int8_t, SZ) \ + RUN (int16_t, uint8_t, SZ) \ + RUN (uint16_t, int8_t, SZ) \ + RUN (uint16_t, uint8_t, SZ) \ + RUN (int32_t, int16_t, SZ) \ + RUN (int32_t, uint16_t, SZ) \ + RUN (uint32_t, int16_t, SZ) \ + RUN (uint32_t, uint16_t, SZ) \ + RUN (int64_t, int32_t, SZ) \ + RUN (int64_t, uint32_t, SZ) \ + RUN (uint64_t, int32_t, SZ) \ + RUN (uint64_t, uint32_t, SZ) + +int +main () +{ + RUN_ALL (15) + RUN_ALL (16) + RUN_ALL (17) + RUN_ALL (127) + RUN_ALL (128) + RUN_ALL (129) + RUN_ALL (512) +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c new file mode 100644 index 0000000..1630ba1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c @@ -0,0 +1,46 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ + +#include <assert.h> +#include "narrow-2.c" + +#define RUN(TYPE1, TYPE2, SZ) \ + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \ + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \ + } \ + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \ + a##TYPE1##_##TYPE2##_##SZ, SZ); \ + for (int i = 0; i < SZ; i++) \ + { \ + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \ + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 7)); \ + } + +#define RUN_ALL(SZ) \ + RUN (int16_t, int8_t, SZ) \ + RUN (int16_t, uint8_t, SZ) \ + RUN (uint16_t, int8_t, SZ) \ + RUN (uint16_t, uint8_t, SZ) \ + RUN (int32_t, int16_t, SZ) \ + RUN (int32_t, uint16_t, SZ) \ + RUN (uint32_t, int16_t, SZ) \ + RUN (uint32_t, uint16_t, SZ) \ + RUN (int64_t, int32_t, SZ) \ + RUN (int64_t, uint32_t, SZ) \ + RUN (uint64_t, int32_t, SZ) \ + RUN (uint64_t, uint32_t, SZ) + +int +main () +{ + RUN_ALL (15) + RUN_ALL (16) + RUN_ALL (17) + RUN_ALL (127) + RUN_ALL (128) + RUN_ALL (129) + RUN_ALL (512) +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c new file mode 100644 index 0000000..7638851 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c @@ -0,0 +1,46 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ + +#include <assert.h> +#include "narrow-3.c" + +#define RUN(TYPE1, TYPE2, SZ) \ + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \ + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \ + } \ + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \ + a##TYPE1##_##TYPE2##_##SZ, 9, SZ); \ + for (int i = 0; i < SZ; i++) \ + { \ + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \ + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 9)); \ + } + +#define RUN_ALL(SZ) \ + RUN (int16_t, int8_t, SZ) \ + RUN (int16_t, uint8_t, SZ) \ + RUN (uint16_t, int8_t, SZ) \ + RUN (uint16_t, uint8_t, SZ) \ + RUN (int32_t, int16_t, SZ) \ + RUN (int32_t, uint16_t, SZ) \ + RUN (uint32_t, int16_t, SZ) \ + RUN (uint32_t, uint16_t, SZ) \ + RUN (int64_t, int32_t, SZ) \ + RUN (int64_t, uint32_t, SZ) \ + RUN (uint64_t, int32_t, SZ) \ + RUN (uint64_t, uint32_t, SZ) + +int +main () +{ + RUN_ALL (15) + RUN_ALL (16) + RUN_ALL (17) + RUN_ALL (127) + RUN_ALL (128) + RUN_ALL (129) + RUN_ALL (512) +} |