From f556cd8bd7929be8b73c66d55f98feac8c9ef1ee Mon Sep 17 00:00:00 2001 From: Ju-Zhe Zhong Date: Mon, 24 Oct 2022 10:08:53 +0800 Subject: RISC-V: Support load/store in mov pattern for RVV modes. gcc/ChangeLog: * config.gcc (riscv*): Add riscv-v.o to extra_objs. * config/riscv/constraints.md (vu): New constraint. (vi): Ditto. (Wc0): Ditto. (Wc1): Ditto. * config/riscv/predicates.md (vector_length_operand): New. (reg_or_mem_operand): Ditto. (vector_move_operand): Ditto. (vector_mask_operand): Ditto. (vector_merge_operand): Ditto. * config/riscv/riscv-protos.h (riscv_regmode_natural_size) New. (riscv_vector::const_vec_all_same_in_range_p): Ditto. (riscv_vector::legitimize_move): Ditto. (tail_policy): Ditto. (mask_policy): Ditto. * config/riscv/riscv-v.cc: New. * config/riscv/riscv-vector-builtins-bases.cc (vsetvl::expand): Refactor how LMUL encoding. * config/riscv/riscv.cc (riscv_print_operand): Update how LMUL print and mask operand print. (riscv_regmode_natural_size): New. * config/riscv/riscv.h (REGMODE_NATURAL_SIZE): New. * config/riscv/riscv.md (mode): Add vector modes. * config/riscv/t-riscv (riscv-v.o) New. * config/riscv/vector-iterators.md: New. * config/riscv/vector.md (vundefined): New. (mov): New. (*mov): New. (@vsetvl_no_side_effects): New. (@pred_mov): New. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/mov-1.c: New. * gcc.target/riscv/rvv/base/mov-10.c: New. * gcc.target/riscv/rvv/base/mov-11.c: New. * gcc.target/riscv/rvv/base/mov-12.c: New. * gcc.target/riscv/rvv/base/mov-13.c: New. * gcc.target/riscv/rvv/base/mov-2.c: New. * gcc.target/riscv/rvv/base/mov-3.c: New. * gcc.target/riscv/rvv/base/mov-4.c: New. * gcc.target/riscv/rvv/base/mov-5.c: New. * gcc.target/riscv/rvv/base/mov-6.c: New. * gcc.target/riscv/rvv/base/mov-7.c: New. * gcc.target/riscv/rvv/base/mov-8.c: New. * gcc.target/riscv/rvv/base/mov-9.c: New. --- gcc/config/riscv/vector.md | 279 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 273 insertions(+), 6 deletions(-) (limited to 'gcc/config/riscv/vector.md') diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 82ce902..451ed23 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -26,11 +26,72 @@ ;; - Auto-vectorization (TBD) ;; - Combine optimization (TBD) +(include "vector-iterators.md") + (define_c_enum "unspec" [ UNSPEC_VSETVL + UNSPEC_VUNDEF + UNSPEC_VPREDICATE ]) ;; ----------------------------------------------------------------- +;; ---- Miscellaneous Operations +;; ----------------------------------------------------------------- + +(define_insn "vundefined" + [(set (match_operand:V 0 "register_operand" "=vr") + (unspec:V [(const_int 0)] UNSPEC_VUNDEF))] + "TARGET_VECTOR" + "") + +;; ----------------------------------------------------------------- +;; ---- Moves Operations +;; ----------------------------------------------------------------- + +(define_expand "mov" + [(set (match_operand:V 0 "reg_or_mem_operand") + (match_operand:V 1 "vector_move_operand"))] + "TARGET_VECTOR" +{ + if (riscv_vector::legitimize_move (operands[0], operands[1], mode)) + DONE; +}) + +;; This pattern is used for code-gen for whole register load/stores. +;; Also applicable for all register moves. +;; Fractional vector modes load/store are not allowed to match this pattern. +;; Mask modes load/store are not allowed to match this pattern. +(define_insn "*mov" + [(set (match_operand:V 0 "reg_or_mem_operand" "=vr,m,vr") + (match_operand:V 1 "reg_or_mem_operand" "m,vr,vr"))] + "TARGET_VECTOR && ((register_operand (operands[0], mode) + && register_operand (operands[1], mode)) + || known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR))" + "@ + vl%m1re.v\t%0,%1 + vs%m1r.v\t%1,%0 + vmv%m1r.v\t%0,%1" + [(set_attr "type" "vldr,vstr,vmov") + (set_attr "mode" "")]) + +(define_expand "mov" + [(set (match_operand:VB 0 "reg_or_mem_operand") + (match_operand:VB 1 "vector_move_operand"))] + "TARGET_VECTOR" +{ + if (riscv_vector::legitimize_move (operands[0], operands[1], mode)) + DONE; +}) + +(define_insn "*mov" + [(set (match_operand:VB 0 "register_operand" "=vr") + (match_operand:VB 1 "register_operand" "vr"))] + "TARGET_VECTOR" + "vmv1r.v\t%0,%1" + [(set_attr "type" "vmov") + (set_attr "mode" "")]) + +;; ----------------------------------------------------------------- ;; ---- 6. Configuration-Setting Instructions ;; ----------------------------------------------------------------- ;; Includes: @@ -50,13 +111,98 @@ ;; operands[3]: LMUL ;; operands[4]: Tail policy 0 or 1 (undisturbed/agnostic) ;; operands[5]: Mask policy 0 or 1 (undisturbed/agnostic) + +;; We define 2 types of "vsetvl*" instruction patterns: + +;; - "@vsetvl" is a parallel format which has side effects. + +;; - "@vsetvl_no_side_effects" has no side effects. + +;; - "@vsetvl" is used by "vsetvl" intrinsics and "insert-vsetvl" PASS. + +;; - "@vsetvl_no_side_effects" is used by GCC standard patterns. + +;; - "@vsetvl" includes VL/VTYPE global registers status (define set) +;; and each RVV instruction includes VL/VTYPE global registers status (use) +;; so that we can guarantee each RVV instruction can execute with correct +;; VL/VTYPE global registers status after "insert-vsetvl" PASS. + +;; - "@vsetvl_no_side_effects" has no side effects and excludes VL/VTYPE +;; global registers status (define set). It's only used by GCC standard pattern +;; expansion. For example: "mov" pattern for fractional vector modes which +;; need to set VL/VTYPE. Then we could manually call this pattern to gain benefits +;; from the optimization of each GCC internal PASS. + +;; 1. void foo (float *in, float *out) +;; { +;; vfloat32mf2_t v = *(vfloat32mf2_t*)in; +;; *(vfloat32mf2_t*)out = v; +;; } +;; We could eliminate the second "vsetvl" by calling "@vsetvl_no_side_effects". +;; +;; "@vsetvl": ;; "@vsetvl_no_side_effects": +;; vsetvli a4,zero,e32,mf2,ta,ma ;; vsetvli a4,zero,e32,mf2,ta,ma +;; vle32.v v24,(a0) ;; vle32.v v24,(a0) +;; vsetvli a4,zero,e32,mf2,ta,ma ;; -- +;; vse32.v v24,(a1) ;; vse32.v v24,(a1) +;; ret ;; ret + +;; 2. void foo (int8_t *in, int8_t *out, int M) +;; { +;; for (int i = 0; i < M; i++){ +;; vint8mf2_t v = *(vint8mf2_t*)(in + i); +;; *(vint8mf2_t*)(out + i) = v; +;; } +;; } +;; +;; Hoist "vsetvl" instruction in LICM: +;; "@vsetvl": ;; "@vsetvl_no_side_effects": +;; - ;; vsetvli a4,zero,e32,mf2,ta,ma +;; LOOP: ;; LOOP: +;; vsetvli a4,zero,e32,mf2,ta,ma ;; - +;; vle32.v v24,(a0) ;; vle32.v v24,(a0) +;; vsetvli a4,zero,e32,mf2,ta,ma ;; - +;; vse32.v v24,(a1) ;; vse32.v v24,(a1) + +;; However, it may produce wrong codegen if we exclude VL/VTYPE in "vsevl". +;; 3. void foo (int8_t *in, int8_t *out, int32_t *in2, int32_t *out2, int M) +;; { +;; for (int i = 0; i < M; i++){ +;; vint8mf2_t v = *(vint8mf2_t*)(in + i); +;; vint32mf2_t v2 = *(vint32mf2_t*)(in + i + i); +;; *(vint8mf2_t*)(out + i) = v; +;; *(vint32mf2_t*)(out + i + i) = v2; +;; } +;; } +;; +;; vsetvli a6,zero,e8,mf2,ta,ma +;; vsetvli a2,zero,e32,mf2,ta,ma +;; LOOP: +;; vle8.v v25,(a0) +;; vle32.v v24,(a5) +;; addi a0,a0,1 +;; vse8.v v25,(a1) +;; vse32.v v24,(a3) +;; +;; Both vle8.v and vle32.v are using the wrong VL/VTYPE status. +;; We leave it to "insert-vsetvl" PASS to correct this situation. + +;; The "insert-vsetvl" PASS mechanism: +;; 1. Before "insert-vsetvl" PASS, only RVV instructions are generated +;; by GCC standard pattern expansion has the corresponding "vsetvl". +;; We exploit each GCC internal optimization pass to optimize the "vsetvl". +;; 2. Correct the VL/VTYPE status for each GCC standard pattern RVV instructions. +;; Insert vsetvl for each RVV instructions that has no VL/VTYPE status if necessary. +;; For example: RVV intrinsics. +;; 3. Optimize "vsetvl" instructions. + (define_insn "@vsetvl" - [(set (match_operand:P 0 "register_operand" "=r,r") - (unspec:P [(match_operand:P 1 "csr_operand" "r,K") - (match_operand 2 "const_int_operand" "i,i") - (match_operand 3 "const_int_operand" "i,i") - (match_operand 4 "const_int_operand" "i,i") - (match_operand 5 "const_int_operand" "i,i")] UNSPEC_VSETVL)) + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "csr_operand" "rK") + (match_operand 2 "const_int_operand" "i") + (match_operand 3 "const_int_operand" "i") + (match_operand 4 "const_int_operand" "i") + (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL)) (set (reg:SI VL_REGNUM) (unspec:SI [(match_dup 1) (match_dup 2) @@ -70,3 +216,124 @@ "vset%i1vli\t%0,%1,e%2,%m3,t%p4,m%p5" [(set_attr "type" "vsetvl") (set_attr "mode" "")]) + +;; We keep it as no side effects before reload_completed. +;; In this case, we can gain benefits from different GCC +;; internal PASS such as cprop, fwprop, combine,...etc. + +;; Then recover it for "insert-vsetvl" and "sched2" PASS +;; in order to get correct codegen. +(define_insn_and_split "@vsetvl_no_side_effects" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "csr_operand" "rK") + (match_operand 2 "const_int_operand" "i") + (match_operand 3 "const_int_operand" "i") + (match_operand 4 "const_int_operand" "i") + (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))] + "TARGET_VECTOR" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (unspec:P [(match_dup 1) (match_dup 2) (match_dup 3) + (match_dup 4) (match_dup 5)] UNSPEC_VSETVL)) + (set (reg:SI VL_REGNUM) + (unspec:SI [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_VSETVL)) + (set (reg:SI VTYPE_REGNUM) + (unspec:SI [(match_dup 2) (match_dup 3) (match_dup 4) + (match_dup 5)] UNSPEC_VSETVL))])] + "" + [(set_attr "type" "vsetvl") + (set_attr "mode" "")]) + +;; RVV machine description matching format +;; (define_insn "" +;; [(set (match_operand:MODE 0) +;; (if_then_else:MODE +;; (unspec: +;; [(match_operand: 1 "vector_mask_operand") +;; (match_operand N + 4 "vector_length_operand") +;; (match_operand N + 5 "const_int_operand") +;; (match_operand N + 6 "const_int_operand") +;; (reg:SI VL_REGNUM) +;; (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) +;; (instruction operation:MODE +;; (match_operand 3 +;; (match_operand 4 +;; (match_operand 5 +;; ................ +;; (match_operand N + 3) +;; (match_operand:MODE 2 "vector_reg_or_const0_operand")))] +;; +;; (unspec:[........] UNSPEC_VPREDICATE) is a predicate wrapper. +;; Include mask predicate && length predicate && vector policy. + +;; ------------------------------------------------------------------------------- +;; ---- Predicated Mov +;; ------------------------------------------------------------------------------- +;; Includes: +;; - 7.4. Vector Unit-Stride Instructions +;; - 11.16 Vector Integer Move Instructions +;; - 13.16 Vector Floating-Point Move Instruction +;; - 15.1 Vector Mask-Register Logical Instructions +;; ------------------------------------------------------------------------------- + +;; vle.v/vse.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f. +;; For vle.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f, we may need merge and mask operand. +;; For vse.v, we don't need merge operand, so it should always match "vu". +;; constraint alternative 0 ~ 1 match vle.v. +;; constraint alternative 2 match vse.v. +;; constraint alternative 3 match vmv.v.v. +;; constraint alternative 4 match vmv.v.i. +;; For vmv.v.i, we allow 2 following cases: +;; 1. (const_vector:VNx1QI repeat [ +;; (const_int:QI N)]), -15 <= N < 16. +;; 2. (const_vector:VNx1SF repeat [ +;; (const_double:SF 0.0 [0x0.0p+0])]). +(define_insn "@pred_mov" + [(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr") + (if_then_else:V + (unspec: + [(match_operand: 1 "vector_mask_operand" " vm, Wc1, vmWc1, vmWc1, Wc1") + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK") + (match_operand 5 "const_int_operand" " i, i, i, i, i") + (match_operand 6 "const_int_operand" " i, i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0") + (match_operand:V 2 "vector_merge_operand" " 0, vu, 0, vu0, vu0")))] + "TARGET_VECTOR" + "@ + vle.v\t%0,%3%p1 + vle.v\t%0,%3%p1 + vse.v\t%3,%0%p1 + vmv.v.v\t%0,%3 + vmv.v.i\t%0,v%3" + [(set_attr "type" "vlde,vlde,vste,vimov,vimov") + (set_attr "mode" "")]) + +;; vlm.v/vsm.v/vmclr.m/vmset.m. +;; constraint alternative 0 match vlm.v. +;; constraint alternative 2 match vsm.v. +;; constraint alternative 3 match vmclr.m. +;; constraint alternative 4 match vmset.m. +(define_insn "@pred_mov" + [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr") + (if_then_else:VB + (unspec:VB + [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1") + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK") + (match_operand 5 "const_int_operand" " i, i, i, i") + (match_operand 6 "const_int_operand" " i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:VB 3 "vector_move_operand" " m, vr, Wc0, Wc1") + (match_operand:VB 2 "vector_merge_operand" " vu, 0, vu, vu")))] + "TARGET_VECTOR" + "@ + vlm.v\t%0,%3 + vsm.v\t%3,%0 + vmclr.m\t%0 + vmset.m\t%0" + [(set_attr "type" "vldm,vstm,vmalu,vmalu") + (set_attr "mode" "")]) -- cgit v1.1