diff options
10 files changed, 321 insertions, 27 deletions
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 22ab8af..df51684 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1196,6 +1196,88 @@ } [(set_attr "type" "vfwmul")]) +;; Combine extend + vredsum to vwredsum[u] +(define_insn_and_split "*reduc_plus_scal_<mode>" + [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand") + (unspec:<V_DOUBLE_EXTEND_VEL> [ + (any_extend:<V_DOUBLE_EXTEND> + (match_operand:VI_QHS_NO_M8 1 "register_operand")) + ] UNSPEC_REDUC_SUM))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + riscv_vector::expand_reduction (<WREDUC_UNSPEC>, operands, + CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); + DONE; +} +[(set_attr "type" "vector")]) + +;; Combine extend + vfredusum to vfwredusum +(define_insn_and_split "*reduc_plus_scal_<mode>" + [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand") + (unspec:<V_DOUBLE_EXTEND_VEL> [ + (float_extend:<V_DOUBLE_EXTEND> + (match_operand:VF_HS_NO_M8 1 "register_operand")) + ] UNSPEC_REDUC_SUM_UNORDERED))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, operands, + CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); + DONE; +} +[(set_attr "type" "vector")]) + +;; Combine extend + vfredosum to vfwredosum +(define_insn_and_split "*fold_left_widen_plus_<mode>" + [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand") + (unspec:<V_DOUBLE_EXTEND_VEL> [ + (float_extend:<V_DOUBLE_EXTEND> + (match_operand:VF_HS_NO_M8 2 "register_operand")) + (match_operand:<V_DOUBLE_EXTEND_VEL> 1 "register_operand") + ] UNSPEC_REDUC_SUM_ORDERED))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, operands, + operands[1], + riscv_vector::reduction_type::FOLD_LEFT); + DONE; +} +[(set_attr "type" "vector")]) + +;; Combine extend + mask vfredosum to mask vfwredosum +(define_insn_and_split "*mask_len_fold_left_widen_plus_<mode>" + [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand") + (unspec:<V_DOUBLE_EXTEND_VEL> [ + (float_extend:<V_DOUBLE_EXTEND> + (match_operand:VF_HS_NO_M8 2 "register_operand")) + (match_operand:<V_DOUBLE_EXTEND_VEL> 1 "register_operand") + (match_operand:<VM> 3 "vector_mask_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand") + ] UNSPEC_REDUC_SUM_ORDERED))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], const0_rtx)) + emit_move_insn (operands[0], operands[1]); + else + riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, operands, + operands[1], + riscv_vector::reduction_type::MASK_LEN_FOLD_LEFT); + DONE; +} +[(set_attr "type" "vector")]) + ;; ============================================================================= ;; Misc combine patterns ;; ============================================================================= diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 9e05afd..8537b9d 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2086,14 +2086,20 @@ ;; - vredxor.vs ;; ------------------------------------------------------------------------- -(define_expand "reduc_plus_scal_<mode>" - [(match_operand:<VEL> 0 "register_operand") - (match_operand:VI 1 "register_operand")] - "TARGET_VECTOR" +(define_insn_and_split "reduc_plus_scal_<mode>" + [(set (match_operand:<VEL> 0 "register_operand") + (unspec:<VEL> [ + (match_operand:VI 1 "register_operand") + ] UNSPEC_REDUC_SUM))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] { riscv_vector::expand_reduction (UNSPEC_REDUC_SUM, operands, CONST0_RTX (<VEL>mode)); DONE; -}) +} +[(set_attr "type" "vector")]) (define_expand "reduc_smax_scal_<mode>" [(match_operand:<VEL> 0 "register_operand") @@ -2173,15 +2179,21 @@ ;; - vfredmin.vs ;; ------------------------------------------------------------------------- -(define_expand "reduc_plus_scal_<mode>" - [(match_operand:<VEL> 0 "register_operand") - (match_operand:VF 1 "register_operand")] - "TARGET_VECTOR" +(define_insn_and_split "reduc_plus_scal_<mode>" + [(set (match_operand:<VEL> 0 "register_operand") + (unspec:<VEL> [ + (match_operand:VF 1 "register_operand") + ] UNSPEC_REDUC_SUM_UNORDERED))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] { riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_UNORDERED, operands, CONST0_RTX (<VEL>mode)); DONE; -}) +} +[(set_attr "type" "vector")]) (define_expand "reduc_smax_scal_<mode>" [(match_operand:<VEL> 0 "register_operand") @@ -2215,27 +2227,38 @@ ;; ------------------------------------------------------------------------- ;; Unpredicated in-order FP reductions. -(define_expand "fold_left_plus_<mode>" - [(match_operand:<VEL> 0 "register_operand") - (match_operand:<VEL> 1 "register_operand") - (match_operand:VF 2 "register_operand")] - "TARGET_VECTOR" +(define_insn_and_split "fold_left_plus_<mode>" + [(set (match_operand:<VEL> 0 "register_operand") + (unspec:<VEL> [ + (match_operand:VF 2 "register_operand") + (match_operand:<VEL> 1 "register_operand") + ] UNSPEC_REDUC_SUM_ORDERED))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] { riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED, operands, operands[1], riscv_vector::reduction_type::FOLD_LEFT); DONE; -}) +} +[(set_attr "type" "vector")]) ;; Predicated in-order FP reductions. -(define_expand "mask_len_fold_left_plus_<mode>" - [(match_operand:<VEL> 0 "register_operand") - (match_operand:<VEL> 1 "register_operand") - (match_operand:VF 2 "register_operand") - (match_operand:<VM> 3 "vector_mask_operand") - (match_operand 4 "autovec_length_operand") - (match_operand 5 "const_0_operand")] - "TARGET_VECTOR" +(define_insn_and_split "mask_len_fold_left_plus_<mode>" + [(set (match_operand:<VEL> 0 "register_operand") + (unspec:<VEL> [ + (match_operand:VF 2 "register_operand") + (match_operand:<VEL> 1 "register_operand") + (match_operand:<VM> 3 "vector_mask_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand") + ] UNSPEC_REDUC_SUM_ORDERED))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] { if (rtx_equal_p (operands[4], const0_rtx)) emit_move_insn (operands[0], operands[1]); @@ -2244,7 +2267,8 @@ operands[1], riscv_vector::reduction_type::MASK_LEN_FOLD_LEFT); DONE; -}) +} +[(set_attr "type" "vector")]) ;; ------------------------------------------------------------------------- ;; ---- [INT,FP] Extract active element diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 1bf5471..668594b 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3212,7 +3212,8 @@ expand_reduction (unsigned unspec, rtx *ops, rtx init, reduction_type type) { rtx vector = type == reduction_type::UNORDERED ? ops[1] : ops[2]; machine_mode vmode = GET_MODE (vector); - machine_mode m1_mode = get_m1_mode (vmode).require (); + machine_mode vel_mode = GET_MODE (ops[0]); + machine_mode m1_mode = get_m1_mode (vel_mode).require (); rtx m1_tmp = gen_reg_rtx (m1_mode); rtx scalar_move_ops[] = {m1_tmp, init}; @@ -3225,7 +3226,9 @@ expand_reduction (unsigned unspec, rtx *ops, rtx init, reduction_type type) rtx reduc_ops[] = {m1_tmp2, vector, m1_tmp}; if (unspec == UNSPEC_REDUC_SUM_ORDERED - || unspec == UNSPEC_REDUC_SUM_UNORDERED) + || unspec == UNSPEC_WREDUC_SUM_ORDERED + || unspec == UNSPEC_REDUC_SUM_UNORDERED + || unspec == UNSPEC_WREDUC_SUM_UNORDERED) { insn_code icode = code_for_pred (unspec, vmode); if (type == reduction_type::MASK_LEN_FOLD_LEFT) diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index c9d0a50..fa9892c 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -686,6 +686,14 @@ RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") ]) +(define_mode_iterator VI_QHS_NO_M8 [ + RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + + RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + + RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") +]) + (define_mode_iterator VF_HS [ (RVVM8HF "TARGET_ZVFH") (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH") @@ -695,6 +703,23 @@ (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") ]) +(define_mode_iterator VF_HS_NO_M8 [ + (RVVM4HF "TARGET_ZVFH") + (RVVM2HF "TARGET_ZVFH") + (RVVM1HF "TARGET_ZVFH") + (RVVMF2HF "TARGET_ZVFH") + (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32") + (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") + (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") + (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") + (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") +]) + +(define_mode_iterator VF_HS_M8 [ + (RVVM8HF "TARGET_ZVFH") + (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") +]) + (define_mode_iterator V_VLSI_QHS [ RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") @@ -1319,6 +1344,8 @@ (UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED "wredusum") ]) +(define_code_attr WREDUC_UNSPEC [(zero_extend "UNSPEC_WREDUC_SUMU") (sign_extend "UNSPEC_WREDUC_SUM")]) + (define_mode_attr VINDEX [ (RVVM8QI "RVVM8QI") (RVVM4QI "RVVM4QI") (RVVM2QI "RVVM2QI") (RVVM1QI "RVVM1QI") (RVVMF2QI "RVVMF2QI") (RVVMF4QI "RVVMF4QI") (RVVMF8QI "RVVMF8QI") @@ -1743,6 +1770,18 @@ (V1DF "DF") (V2DF "DF") (V4DF "DF") (V8DF "DF") (V16DF "DF") (V32DF "DF") (V64DF "DF") (V128DF "DF") (V256DF "DF") (V512DF "DF") ]) +(define_mode_attr V_DOUBLE_EXTEND_VEL [ + (RVVM4QI "HI") (RVVM2QI "HI") (RVVM1QI "HI") (RVVMF2QI "HI") (RVVMF4QI "HI") (RVVMF8QI "HI") + + (RVVM4HI "SI") (RVVM2HI "SI") (RVVM1HI "SI") (RVVMF2HI "SI") (RVVMF4HI "SI") + + (RVVM4SI "DI") (RVVM2SI "DI") (RVVM1SI "DI") (RVVMF2SI "DI") + + (RVVM4HF "SF") (RVVM2HF "SF") (RVVM1HF "SF") (RVVMF2HF "SF") (RVVMF4HF "SF") + + (RVVM4SF "DF") (RVVM2SF "DF") (RVVM1SF "DF") (RVVMF2SF "DF") +]) + (define_mode_attr vel [ (RVVM8QI "qi") (RVVM4QI "qi") (RVVM2QI "qi") (RVVM1QI "qi") (RVVMF2QI "qi") (RVVMF4QI "qi") (RVVMF8QI "qi") @@ -2101,6 +2140,18 @@ (RVVM1QI "64") (RVVMF2QI "64") (RVVMF4QI "64") (RVVMF8QI "64") ]) +(define_mode_attr V_DOUBLE_EXTEND [ + (RVVM4QI "RVVM8HI") (RVVM2QI "RVVM4HI") (RVVM1QI "RVVM2HI") (RVVMF2QI "RVVM1HI") (RVVMF4QI "RVVMF2HI") (RVVMF8QI "RVVMF4HI") + + (RVVM4HI "RVVM8SI") (RVVM2HI "RVVM4SI") (RVVM1HI "RVVM2SI") (RVVMF2HI "RVVM1SI") (RVVMF4HI "RVVMF2SI") + + (RVVM4SI "RVVM8DI") (RVVM2SI "RVVM4DI") (RVVM1SI "RVVM2DI") (RVVMF2SI "RVVM1DI") + + (RVVM4HF "RVVM8SF") (RVVM2HF "RVVM4SF") (RVVM1HF "RVVM2SF") (RVVMF2HF "RVVM1SF") (RVVMF4HF "RVVMF2SF") + + (RVVM4SF "RVVM8DF") (RVVM2SF "RVVM4DF") (RVVM1SF "RVVM2DF") (RVVMF2SF "RVVM1DF") +]) + (define_mode_attr V_DOUBLE_TRUNC [ (RVVM8HI "RVVM4QI") (RVVM4HI "RVVM2QI") (RVVM2HI "RVVM1QI") (RVVM1HI "RVVMF2QI") (RVVMF2HI "RVVMF4QI") (RVVMF4HI "RVVMF8QI") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc-1.c new file mode 100644 index 0000000..8075646 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc-1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */ +#include <stdint-gcc.h> + +#define TEST_TYPE(TYPE1, TYPE2, N) \ + __attribute__((noipa)) TYPE1 reduc_##TYPE1##_##TYPE2(TYPE2 *restrict a) { \ + TYPE1 sum = 0; \ + for (int i = 0; i < N; i += 1) \ + sum += a[i]; \ + return sum; \ + } + +#define TEST_ALL(TEST) \ + TEST(int16_t, int8_t, 16) \ + TEST(int32_t, int16_t, 8) \ + TEST(int64_t, int32_t, 4) \ + TEST(uint16_t, uint8_t, 16) \ + TEST(uint32_t, uint16_t, 8) \ + TEST(uint64_t, uint32_t, 4) \ + TEST(float, _Float16, 8) \ + TEST(double, float, 4) + +TEST_ALL(TEST_TYPE) + +/* { dg-final { scan-assembler-times {\tvfwredusum\.vs} 2 } } */ +/* { dg-final { scan-assembler-times {\tvwredsum\.vs} 3 } } */ +/* { dg-final { scan-assembler-times {\tvwredsumu\.vs} 3 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-1.c new file mode 100644 index 0000000..7ae5080 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d --param riscv-autovec-preference=scalable -fno-vect-cost-model" } */ +#include <stdint-gcc.h> + +#define TEST_TYPE(TYPE1, TYPE2) \ + __attribute__((noipa)) \ + TYPE1 reduc_##TYPE1##_##TYPE2(TYPE2 *restrict a, int n) { \ + TYPE1 sum = 0; \ + for (int i = 0; i < n; i += 1) \ + sum += a[i]; \ + return sum; \ + } + +#define TEST_ALL(TEST) \ + TEST(float, _Float16) \ + TEST(double, float) + +TEST_ALL(TEST_TYPE) + +/* { dg-final { scan-assembler-times {\tvfwredosum\.vs} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-2.c new file mode 100644 index 0000000..a922aa7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-2.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model" } */ +#include <stdint-gcc.h> + +#define TEST_TYPE(TYPE1, TYPE2, N) \ + __attribute__((noipa)) TYPE1 reduc_##TYPE1##_##TYPE2(TYPE2 *restrict a) { \ + TYPE1 sum = 0; \ + for (int i = 0; i < N; i += 1) \ + sum += a[i]; \ + return sum; \ + } + +#define TEST_ALL(TEST) \ + TEST(float, _Float16, 8) \ + TEST(double, float, 4) + +TEST_ALL(TEST_TYPE) + +/* { dg-final { scan-assembler-times {\tvfwredosum\.vs} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-1.c new file mode 100644 index 0000000..d4ba4f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-1.c @@ -0,0 +1,24 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */ + +#include "widen_reduc_order-1.c" + +#define N 99 + +#define RUN(TYPE1, TYPE2) \ + { \ + TYPE2 a[N]; \ + TYPE1 r = 0; \ + for (int i = 0; i < N; i++) { \ + a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \ + r += a[i]; \ + asm volatile("" ::: "memory"); \ + } \ + if (r != reduc_##TYPE1##_##TYPE2(a, N)) \ + __builtin_abort(); \ + } + +int __attribute__((optimize(1))) main() { + TEST_ALL(RUN) + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-2.c new file mode 100644 index 0000000..6ac6762 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-2.c @@ -0,0 +1,22 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */ + +#include "widen_reduc_order-2.c" + +#define RUN(TYPE1, TYPE2, N) \ + { \ + TYPE2 a[N]; \ + TYPE1 r = 0; \ + for (int i = 0; i < N; i++) { \ + a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \ + r += a[i]; \ + asm volatile("" ::: "memory"); \ + } \ + if (r != reduc_##TYPE1##_##TYPE2(a)) \ + __builtin_abort(); \ + } + +int __attribute__((optimize(1))) main() { + TEST_ALL(RUN) + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_run-1.c new file mode 100644 index 0000000..d70a652 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_reduc_run-1.c @@ -0,0 +1,22 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */ + +#include "widen_reduc-1.c" + +#define RUN(TYPE1, TYPE2, N) \ + { \ + TYPE2 a[N]; \ + TYPE1 r = 0; \ + for (int i = 0; i < N; i++) { \ + a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \ + r += a[i]; \ + asm volatile("" ::: "memory"); \ + } \ + if (r != reduc_##TYPE1##_##TYPE2(a)) \ + __builtin_abort(); \ + } + +int __attribute__((optimize(1))) main() { + TEST_ALL(RUN) + return 0; +} |