aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/riscv/vector.md
diff options
context:
space:
mode:
authorJu-Zhe Zhong <juzhe.zhong@rivai.ai>2022-10-24 10:08:53 +0800
committerKito Cheng <kito.cheng@sifive.com>2022-10-26 17:01:36 +0800
commitf556cd8bd7929be8b73c66d55f98feac8c9ef1ee (patch)
treeb2953766e61d3674ec7dc730d3b0dd94aae9158f /gcc/config/riscv/vector.md
parent86654b2cc167b540f4f144549b80748ce0054729 (diff)
downloadgcc-f556cd8bd7929be8b73c66d55f98feac8c9ef1ee.zip
gcc-f556cd8bd7929be8b73c66d55f98feac8c9ef1ee.tar.gz
gcc-f556cd8bd7929be8b73c66d55f98feac8c9ef1ee.tar.bz2
RISC-V: Support load/store in mov<mode> pattern for RVV modes.
gcc/ChangeLog: * config.gcc (riscv*): Add riscv-v.o to extra_objs. * config/riscv/constraints.md (vu): New constraint. (vi): Ditto. (Wc0): Ditto. (Wc1): Ditto. * config/riscv/predicates.md (vector_length_operand): New. (reg_or_mem_operand): Ditto. (vector_move_operand): Ditto. (vector_mask_operand): Ditto. (vector_merge_operand): Ditto. * config/riscv/riscv-protos.h (riscv_regmode_natural_size) New. (riscv_vector::const_vec_all_same_in_range_p): Ditto. (riscv_vector::legitimize_move): Ditto. (tail_policy): Ditto. (mask_policy): Ditto. * config/riscv/riscv-v.cc: New. * config/riscv/riscv-vector-builtins-bases.cc (vsetvl::expand): Refactor how LMUL encoding. * config/riscv/riscv.cc (riscv_print_operand): Update how LMUL print and mask operand print. (riscv_regmode_natural_size): New. * config/riscv/riscv.h (REGMODE_NATURAL_SIZE): New. * config/riscv/riscv.md (mode): Add vector modes. * config/riscv/t-riscv (riscv-v.o) New. * config/riscv/vector-iterators.md: New. * config/riscv/vector.md (vundefined<mode>): New. (mov<mode>): New. (*mov<mode>): New. (@vsetvl<mode>_no_side_effects): New. (@pred_mov<mode>): New. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/mov-1.c: New. * gcc.target/riscv/rvv/base/mov-10.c: New. * gcc.target/riscv/rvv/base/mov-11.c: New. * gcc.target/riscv/rvv/base/mov-12.c: New. * gcc.target/riscv/rvv/base/mov-13.c: New. * gcc.target/riscv/rvv/base/mov-2.c: New. * gcc.target/riscv/rvv/base/mov-3.c: New. * gcc.target/riscv/rvv/base/mov-4.c: New. * gcc.target/riscv/rvv/base/mov-5.c: New. * gcc.target/riscv/rvv/base/mov-6.c: New. * gcc.target/riscv/rvv/base/mov-7.c: New. * gcc.target/riscv/rvv/base/mov-8.c: New. * gcc.target/riscv/rvv/base/mov-9.c: New.
Diffstat (limited to 'gcc/config/riscv/vector.md')
-rw-r--r--gcc/config/riscv/vector.md279
1 files changed, 273 insertions, 6 deletions
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 82ce902..451ed23 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -26,11 +26,72 @@
;; - Auto-vectorization (TBD)
;; - Combine optimization (TBD)
+(include "vector-iterators.md")
+
(define_c_enum "unspec" [
UNSPEC_VSETVL
+ UNSPEC_VUNDEF
+ UNSPEC_VPREDICATE
])
;; -----------------------------------------------------------------
+;; ---- Miscellaneous Operations
+;; -----------------------------------------------------------------
+
+(define_insn "vundefined<mode>"
+ [(set (match_operand:V 0 "register_operand" "=vr")
+ (unspec:V [(const_int 0)] UNSPEC_VUNDEF))]
+ "TARGET_VECTOR"
+ "")
+
+;; -----------------------------------------------------------------
+;; ---- Moves Operations
+;; -----------------------------------------------------------------
+
+(define_expand "mov<mode>"
+ [(set (match_operand:V 0 "reg_or_mem_operand")
+ (match_operand:V 1 "vector_move_operand"))]
+ "TARGET_VECTOR"
+{
+ if (riscv_vector::legitimize_move (operands[0], operands[1], <VM>mode))
+ DONE;
+})
+
+;; This pattern is used for code-gen for whole register load/stores.
+;; Also applicable for all register moves.
+;; Fractional vector modes load/store are not allowed to match this pattern.
+;; Mask modes load/store are not allowed to match this pattern.
+(define_insn "*mov<mode>"
+ [(set (match_operand:V 0 "reg_or_mem_operand" "=vr,m,vr")
+ (match_operand:V 1 "reg_or_mem_operand" "m,vr,vr"))]
+ "TARGET_VECTOR && ((register_operand (operands[0], <MODE>mode)
+ && register_operand (operands[1], <MODE>mode))
+ || known_ge (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR))"
+ "@
+ vl%m1re<sew>.v\t%0,%1
+ vs%m1r.v\t%1,%0
+ vmv%m1r.v\t%0,%1"
+ [(set_attr "type" "vldr,vstr,vmov")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "mov<mode>"
+ [(set (match_operand:VB 0 "reg_or_mem_operand")
+ (match_operand:VB 1 "vector_move_operand"))]
+ "TARGET_VECTOR"
+{
+ if (riscv_vector::legitimize_move (operands[0], operands[1], <MODE>mode))
+ DONE;
+})
+
+(define_insn "*mov<mode>"
+ [(set (match_operand:VB 0 "register_operand" "=vr")
+ (match_operand:VB 1 "register_operand" "vr"))]
+ "TARGET_VECTOR"
+ "vmv1r.v\t%0,%1"
+ [(set_attr "type" "vmov")
+ (set_attr "mode" "<MODE>")])
+
+;; -----------------------------------------------------------------
;; ---- 6. Configuration-Setting Instructions
;; -----------------------------------------------------------------
;; Includes:
@@ -50,13 +111,98 @@
;; operands[3]: LMUL
;; operands[4]: Tail policy 0 or 1 (undisturbed/agnostic)
;; operands[5]: Mask policy 0 or 1 (undisturbed/agnostic)
+
+;; We define 2 types of "vsetvl*" instruction patterns:
+
+;; - "@vsetvl<mode>" is a parallel format which has side effects.
+
+;; - "@vsetvl<mode>_no_side_effects" has no side effects.
+
+;; - "@vsetvl<mode>" is used by "vsetvl" intrinsics and "insert-vsetvl" PASS.
+
+;; - "@vsetvl<mode>_no_side_effects" is used by GCC standard patterns.
+
+;; - "@vsetvl<mode>" includes VL/VTYPE global registers status (define set)
+;; and each RVV instruction includes VL/VTYPE global registers status (use)
+;; so that we can guarantee each RVV instruction can execute with correct
+;; VL/VTYPE global registers status after "insert-vsetvl" PASS.
+
+;; - "@vsetvl<mode>_no_side_effects" has no side effects and excludes VL/VTYPE
+;; global registers status (define set). It's only used by GCC standard pattern
+;; expansion. For example: "mov<mode>" pattern for fractional vector modes which
+;; need to set VL/VTYPE. Then we could manually call this pattern to gain benefits
+;; from the optimization of each GCC internal PASS.
+
+;; 1. void foo (float *in, float *out)
+;; {
+;; vfloat32mf2_t v = *(vfloat32mf2_t*)in;
+;; *(vfloat32mf2_t*)out = v;
+;; }
+;; We could eliminate the second "vsetvl" by calling "@vsetvl<mode>_no_side_effects".
+;;
+;; "@vsetvl<mode>": ;; "@vsetvl<mode>_no_side_effects":
+;; vsetvli a4,zero,e32,mf2,ta,ma ;; vsetvli a4,zero,e32,mf2,ta,ma
+;; vle32.v v24,(a0) ;; vle32.v v24,(a0)
+;; vsetvli a4,zero,e32,mf2,ta,ma ;; --
+;; vse32.v v24,(a1) ;; vse32.v v24,(a1)
+;; ret ;; ret
+
+;; 2. void foo (int8_t *in, int8_t *out, int M)
+;; {
+;; for (int i = 0; i < M; i++){
+;; vint8mf2_t v = *(vint8mf2_t*)(in + i);
+;; *(vint8mf2_t*)(out + i) = v;
+;; }
+;; }
+;;
+;; Hoist "vsetvl" instruction in LICM:
+;; "@vsetvl<mode>": ;; "@vsetvl<mode>_no_side_effects":
+;; - ;; vsetvli a4,zero,e32,mf2,ta,ma
+;; LOOP: ;; LOOP:
+;; vsetvli a4,zero,e32,mf2,ta,ma ;; -
+;; vle32.v v24,(a0) ;; vle32.v v24,(a0)
+;; vsetvli a4,zero,e32,mf2,ta,ma ;; -
+;; vse32.v v24,(a1) ;; vse32.v v24,(a1)
+
+;; However, it may produce wrong codegen if we exclude VL/VTYPE in "vsevl<mode>".
+;; 3. void foo (int8_t *in, int8_t *out, int32_t *in2, int32_t *out2, int M)
+;; {
+;; for (int i = 0; i < M; i++){
+;; vint8mf2_t v = *(vint8mf2_t*)(in + i);
+;; vint32mf2_t v2 = *(vint32mf2_t*)(in + i + i);
+;; *(vint8mf2_t*)(out + i) = v;
+;; *(vint32mf2_t*)(out + i + i) = v2;
+;; }
+;; }
+;;
+;; vsetvli a6,zero,e8,mf2,ta,ma
+;; vsetvli a2,zero,e32,mf2,ta,ma
+;; LOOP:
+;; vle8.v v25,(a0)
+;; vle32.v v24,(a5)
+;; addi a0,a0,1
+;; vse8.v v25,(a1)
+;; vse32.v v24,(a3)
+;;
+;; Both vle8.v and vle32.v are using the wrong VL/VTYPE status.
+;; We leave it to "insert-vsetvl" PASS to correct this situation.
+
+;; The "insert-vsetvl" PASS mechanism:
+;; 1. Before "insert-vsetvl" PASS, only RVV instructions are generated
+;; by GCC standard pattern expansion has the corresponding "vsetvl".
+;; We exploit each GCC internal optimization pass to optimize the "vsetvl".
+;; 2. Correct the VL/VTYPE status for each GCC standard pattern RVV instructions.
+;; Insert vsetvl for each RVV instructions that has no VL/VTYPE status if necessary.
+;; For example: RVV intrinsics.
+;; 3. Optimize "vsetvl" instructions.
+
(define_insn "@vsetvl<mode>"
- [(set (match_operand:P 0 "register_operand" "=r,r")
- (unspec:P [(match_operand:P 1 "csr_operand" "r,K")
- (match_operand 2 "const_int_operand" "i,i")
- (match_operand 3 "const_int_operand" "i,i")
- (match_operand 4 "const_int_operand" "i,i")
- (match_operand 5 "const_int_operand" "i,i")] UNSPEC_VSETVL))
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (unspec:P [(match_operand:P 1 "csr_operand" "rK")
+ (match_operand 2 "const_int_operand" "i")
+ (match_operand 3 "const_int_operand" "i")
+ (match_operand 4 "const_int_operand" "i")
+ (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))
(set (reg:SI VL_REGNUM)
(unspec:SI [(match_dup 1)
(match_dup 2)
@@ -70,3 +216,124 @@
"vset%i1vli\t%0,%1,e%2,%m3,t%p4,m%p5"
[(set_attr "type" "vsetvl")
(set_attr "mode" "<MODE>")])
+
+;; We keep it as no side effects before reload_completed.
+;; In this case, we can gain benefits from different GCC
+;; internal PASS such as cprop, fwprop, combine,...etc.
+
+;; Then recover it for "insert-vsetvl" and "sched2" PASS
+;; in order to get correct codegen.
+(define_insn_and_split "@vsetvl<mode>_no_side_effects"
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (unspec:P [(match_operand:P 1 "csr_operand" "rK")
+ (match_operand 2 "const_int_operand" "i")
+ (match_operand 3 "const_int_operand" "i")
+ (match_operand 4 "const_int_operand" "i")
+ (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))]
+ "TARGET_VECTOR"
+ "#"
+ "&& reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (unspec:P [(match_dup 1) (match_dup 2) (match_dup 3)
+ (match_dup 4) (match_dup 5)] UNSPEC_VSETVL))
+ (set (reg:SI VL_REGNUM)
+ (unspec:SI [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_VSETVL))
+ (set (reg:SI VTYPE_REGNUM)
+ (unspec:SI [(match_dup 2) (match_dup 3) (match_dup 4)
+ (match_dup 5)] UNSPEC_VSETVL))])]
+ ""
+ [(set_attr "type" "vsetvl")
+ (set_attr "mode" "<MODE>")])
+
+;; RVV machine description matching format
+;; (define_insn ""
+;; [(set (match_operand:MODE 0)
+;; (if_then_else:MODE
+;; (unspec:<MODE:VM>
+;; [(match_operand:<VM> 1 "vector_mask_operand")
+;; (match_operand N + 4 "vector_length_operand")
+;; (match_operand N + 5 "const_int_operand")
+;; (match_operand N + 6 "const_int_operand")
+;; (reg:SI VL_REGNUM)
+;; (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+;; (instruction operation:MODE
+;; (match_operand 3
+;; (match_operand 4
+;; (match_operand 5
+;; ................
+;; (match_operand N + 3)
+;; (match_operand:MODE 2 "vector_reg_or_const0_operand")))]
+;;
+;; (unspec:[........] UNSPEC_VPREDICATE) is a predicate wrapper.
+;; Include mask predicate && length predicate && vector policy.
+
+;; -------------------------------------------------------------------------------
+;; ---- Predicated Mov
+;; -------------------------------------------------------------------------------
+;; Includes:
+;; - 7.4. Vector Unit-Stride Instructions
+;; - 11.16 Vector Integer Move Instructions
+;; - 13.16 Vector Floating-Point Move Instruction
+;; - 15.1 Vector Mask-Register Logical Instructions
+;; -------------------------------------------------------------------------------
+
+;; vle.v/vse.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f.
+;; For vle.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f, we may need merge and mask operand.
+;; For vse.v, we don't need merge operand, so it should always match "vu".
+;; constraint alternative 0 ~ 1 match vle.v.
+;; constraint alternative 2 match vse.v.
+;; constraint alternative 3 match vmv.v.v.
+;; constraint alternative 4 match vmv.v.i.
+;; For vmv.v.i, we allow 2 following cases:
+;; 1. (const_vector:VNx1QI repeat [
+;; (const_int:QI N)]), -15 <= N < 16.
+;; 2. (const_vector:VNx1SF repeat [
+;; (const_double:SF 0.0 [0x0.0p+0])]).
+(define_insn "@pred_mov<mode>"
+ [(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr")
+ (if_then_else:V
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1, vmWc1, vmWc1, Wc1")
+ (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 5 "const_int_operand" " i, i, i, i, i")
+ (match_operand 6 "const_int_operand" " i, i, i, i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0")
+ (match_operand:V 2 "vector_merge_operand" " 0, vu, 0, vu0, vu0")))]
+ "TARGET_VECTOR"
+ "@
+ vle<sew>.v\t%0,%3%p1
+ vle<sew>.v\t%0,%3%p1
+ vse<sew>.v\t%3,%0%p1
+ vmv.v.v\t%0,%3
+ vmv.v.i\t%0,v%3"
+ [(set_attr "type" "vlde,vlde,vste,vimov,vimov")
+ (set_attr "mode" "<MODE>")])
+
+;; vlm.v/vsm.v/vmclr.m/vmset.m.
+;; constraint alternative 0 match vlm.v.
+;; constraint alternative 2 match vsm.v.
+;; constraint alternative 3 match vmclr.m.
+;; constraint alternative 4 match vmset.m.
+(define_insn "@pred_mov<mode>"
+ [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr")
+ (if_then_else:VB
+ (unspec:VB
+ [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1")
+ (match_operand 4 "vector_length_operand" " rK, rK, rK, rK")
+ (match_operand 5 "const_int_operand" " i, i, i, i")
+ (match_operand 6 "const_int_operand" " i, i, i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operand:VB 3 "vector_move_operand" " m, vr, Wc0, Wc1")
+ (match_operand:VB 2 "vector_merge_operand" " vu, 0, vu, vu")))]
+ "TARGET_VECTOR"
+ "@
+ vlm.v\t%0,%3
+ vsm.v\t%3,%0
+ vmclr.m\t%0
+ vmset.m\t%0"
+ [(set_attr "type" "vldm,vstm,vmalu,vmalu")
+ (set_attr "mode" "<MODE>")])