;; Machine description for RISC-V 'V' Extension for GNU compiler. ;; Copyright (C) 2022-2022 Free Software Foundation, Inc. ;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. ;; This file is part of GCC. ;; GCC is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; GCC is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING3. If not see ;; . ;; This file describes the RISC-V 'V' Extension, Version 1.0. ;; ;; This file include : ;; ;; - Intrinsics (https://github.com/riscv/rvv-intrinsic-doc) ;; - Auto-vectorization (TBD) ;; - Combine optimization (TBD) (include "vector-iterators.md") (define_c_enum "unspec" [ UNSPEC_VSETVL UNSPEC_VUNDEF UNSPEC_VPREDICATE UNSPEC_VLMAX ]) (define_constants [ (INVALID_ATTRIBUTE 255) ]) ;; True if the type is RVV instructions that include VTYPE ;; global status register in the use op list. ;; We known VTYPE has 4 fields: SEW, LMUL, TA, MA. ;; The instruction need any of VTYPE field is set as true ;; in this attribute. (define_attr "has_vtype_op" "false,true" (cond [(eq_attr "type" "vlde,vste,vldm,vstm,vlds,vsts,\ vldux,vldox,vstux,vstox,vldff,\ vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,\ vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov,\ vsalu,vaalu,vsmul,vsshift,vnclip,\ vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,\ vfcmp,vfsgnj,vfclass,vfmerge,vfmov,\ vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,\ vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,\ vired,viwred,vfred,vfredo,vfwred,vfwredo,\ vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,\ vislide,vislide1,vfslide1,vgather,vcompress") (const_string "true")] (const_string "false"))) ;; True if the type is RVV instructions that include VL ;; global status register in the use op list. ;; The instruction need vector length to be specified is set ;; in this attribute. (define_attr "has_vl_op" "false,true" (cond [(eq_attr "type" "vlde,vste,vldm,vstm,vlds,vsts,\ vldux,vldox,vstux,vstox,vldff,\ vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,\ vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov,\ vsalu,vaalu,vsmul,vsshift,vnclip,\ vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,\ vfcmp,vfsgnj,vfclass,vfmerge,vfmov,\ vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,\ vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,\ vired,viwred,vfred,vfredo,vfwred,vfwredo,\ vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovxv,vfmovfv,\ vislide,vislide1,vfslide1,vgather,vcompress") (const_string "true")] (const_string "false"))) ;; The default SEW of RVV instruction. This attribute doesn't mean the instruction ;; is necessary to require SEW check for example vlm.v which require ratio to ;; check. However, we need default value of SEW for vsetvl instruction since there ;; is no field for ratio in the vsetvl instruction encoding. (define_attr "sew" "" (cond [(eq_attr "mode" "VNx1QI,VNx2QI,VNx4QI,VNx8QI,VNx16QI,VNx32QI,VNx64QI,\ VNx1BI,VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI") (const_int 8) (eq_attr "mode" "VNx1HI,VNx2HI,VNx4HI,VNx8HI,VNx16HI,VNx32HI") (const_int 16) (eq_attr "mode" "VNx1SI,VNx2SI,VNx4SI,VNx8SI,VNx16SI,\ VNx1SF,VNx2SF,VNx4SF,VNx8SF,VNx16SF") (const_int 32) (eq_attr "mode" "VNx1DI,VNx2DI,VNx4DI,VNx8DI,\ VNx1DF,VNx2DF,VNx4DF,VNx8DF") (const_int 64) (eq_attr "type" "vsetvl") (if_then_else (eq_attr "INSN_CODE (curr_insn) == CODE_FOR_vsetvldi || INSN_CODE (curr_insn) == CODE_FOR_vsetvlsi") (symbol_ref "INTVAL (operands[2])") (const_int INVALID_ATTRIBUTE))] (const_int INVALID_ATTRIBUTE))) ;; Ditto to LMUL. (define_attr "vlmul" "" (cond [(eq_attr "mode" "VNx1QI,VNx1BI") (symbol_ref "riscv_vector::get_vlmul(E_VNx1QImode)") (eq_attr "mode" "VNx2QI,VNx2BI") (symbol_ref "riscv_vector::get_vlmul(E_VNx2QImode)") (eq_attr "mode" "VNx4QI,VNx4BI") (symbol_ref "riscv_vector::get_vlmul(E_VNx4QImode)") (eq_attr "mode" "VNx8QI,VNx8BI") (symbol_ref "riscv_vector::get_vlmul(E_VNx8QImode)") (eq_attr "mode" "VNx16QI,VNx16BI") (symbol_ref "riscv_vector::get_vlmul(E_VNx16QImode)") (eq_attr "mode" "VNx32QI,VNx32BI") (symbol_ref "riscv_vector::get_vlmul(E_VNx32QImode)") (eq_attr "mode" "VNx64QI,VNx64BI") (symbol_ref "riscv_vector::get_vlmul(E_VNx64QImode)") (eq_attr "mode" "VNx1HI") (symbol_ref "riscv_vector::get_vlmul(E_VNx1HImode)") (eq_attr "mode" "VNx2HI") (symbol_ref "riscv_vector::get_vlmul(E_VNx2HImode)") (eq_attr "mode" "VNx4HI") (symbol_ref "riscv_vector::get_vlmul(E_VNx4HImode)") (eq_attr "mode" "VNx8HI") (symbol_ref "riscv_vector::get_vlmul(E_VNx8HImode)") (eq_attr "mode" "VNx16HI") (symbol_ref "riscv_vector::get_vlmul(E_VNx16HImode)") (eq_attr "mode" "VNx32HI") (symbol_ref "riscv_vector::get_vlmul(E_VNx32HImode)") (eq_attr "mode" "VNx1SI,VNx1SF") (symbol_ref "riscv_vector::get_vlmul(E_VNx1SImode)") (eq_attr "mode" "VNx2SI,VNx2SF") (symbol_ref "riscv_vector::get_vlmul(E_VNx2SImode)") (eq_attr "mode" "VNx4SI,VNx4SF") (symbol_ref "riscv_vector::get_vlmul(E_VNx4SImode)") (eq_attr "mode" "VNx8SI,VNx8SF") (symbol_ref "riscv_vector::get_vlmul(E_VNx8SImode)") (eq_attr "mode" "VNx16SI,VNx16SF") (symbol_ref "riscv_vector::get_vlmul(E_VNx16SImode)") (eq_attr "mode" "VNx1DI,VNx1DF") (symbol_ref "riscv_vector::get_vlmul(E_VNx1DImode)") (eq_attr "mode" "VNx2DI,VNx2DF") (symbol_ref "riscv_vector::get_vlmul(E_VNx2DImode)") (eq_attr "mode" "VNx4DI,VNx4DF") (symbol_ref "riscv_vector::get_vlmul(E_VNx4DImode)") (eq_attr "mode" "VNx8DI,VNx8DF") (symbol_ref "riscv_vector::get_vlmul(E_VNx8DImode)") (eq_attr "type" "vsetvl") (if_then_else (eq_attr "INSN_CODE (curr_insn) == CODE_FOR_vsetvldi || INSN_CODE (curr_insn) == CODE_FOR_vsetvlsi") (symbol_ref "INTVAL (operands[3])") (const_int INVALID_ATTRIBUTE))] (const_int INVALID_ATTRIBUTE))) ;; It is valid for instruction that require sew/lmul ratio. (define_attr "ratio" "" (cond [(eq_attr "type" "vimov,vfmov") (const_int INVALID_ATTRIBUTE) (eq_attr "mode" "VNx1QI,VNx1BI") (symbol_ref "riscv_vector::get_ratio(E_VNx1QImode)") (eq_attr "mode" "VNx2QI,VNx2BI") (symbol_ref "riscv_vector::get_ratio(E_VNx2QImode)") (eq_attr "mode" "VNx4QI,VNx4BI") (symbol_ref "riscv_vector::get_ratio(E_VNx4QImode)") (eq_attr "mode" "VNx8QI,VNx8BI") (symbol_ref "riscv_vector::get_ratio(E_VNx8QImode)") (eq_attr "mode" "VNx16QI,VNx16BI") (symbol_ref "riscv_vector::get_ratio(E_VNx16QImode)") (eq_attr "mode" "VNx32QI,VNx32BI") (symbol_ref "riscv_vector::get_ratio(E_VNx32QImode)") (eq_attr "mode" "VNx64QI,VNx64BI") (symbol_ref "riscv_vector::get_ratio(E_VNx64QImode)") (eq_attr "mode" "VNx1HI") (symbol_ref "riscv_vector::get_ratio(E_VNx1HImode)") (eq_attr "mode" "VNx2HI") (symbol_ref "riscv_vector::get_ratio(E_VNx2HImode)") (eq_attr "mode" "VNx4HI") (symbol_ref "riscv_vector::get_ratio(E_VNx4HImode)") (eq_attr "mode" "VNx8HI") (symbol_ref "riscv_vector::get_ratio(E_VNx8HImode)") (eq_attr "mode" "VNx16HI") (symbol_ref "riscv_vector::get_ratio(E_VNx16HImode)") (eq_attr "mode" "VNx32HI") (symbol_ref "riscv_vector::get_ratio(E_VNx32HImode)") (eq_attr "mode" "VNx1SI,VNx1SF") (symbol_ref "riscv_vector::get_ratio(E_VNx1SImode)") (eq_attr "mode" "VNx2SI,VNx2SF") (symbol_ref "riscv_vector::get_ratio(E_VNx2SImode)") (eq_attr "mode" "VNx4SI,VNx4SF") (symbol_ref "riscv_vector::get_ratio(E_VNx4SImode)") (eq_attr "mode" "VNx8SI,VNx8SF") (symbol_ref "riscv_vector::get_ratio(E_VNx8SImode)") (eq_attr "mode" "VNx16SI,VNx16SF") (symbol_ref "riscv_vector::get_ratio(E_VNx16SImode)") (eq_attr "mode" "VNx1DI,VNx1DF") (symbol_ref "riscv_vector::get_ratio(E_VNx1DImode)") (eq_attr "mode" "VNx2DI,VNx2DF") (symbol_ref "riscv_vector::get_ratio(E_VNx2DImode)") (eq_attr "mode" "VNx4DI,VNx4DF") (symbol_ref "riscv_vector::get_ratio(E_VNx4DImode)") (eq_attr "mode" "VNx8DI,VNx8DF") (symbol_ref "riscv_vector::get_ratio(E_VNx8DImode)")] (const_int INVALID_ATTRIBUTE))) ;; The index of operand[] to get the merge op. (define_attr "merge_op_idx" "" (cond [(eq_attr "type" "vlde,vste,vimov,vfmov,vldm,vstm,vlds,vmalu") (const_int 2)] (const_int INVALID_ATTRIBUTE))) ;; The index of operand[] to get the avl op. (define_attr "vl_op_idx" "" (cond [(eq_attr "type" "vlde,vste,vimov,vfmov,vldm,vstm,vlds,vmalu") (const_int 4)] (const_int INVALID_ATTRIBUTE))) ;; The index of operand[] to get the tail policy op. (define_attr "tail_policy_op_idx" "" (cond [(eq_attr "type" "vlde,vste,vimov,vfmov,vlds") (const_int 5)] (const_int INVALID_ATTRIBUTE))) ;; The index of operand[] to get the mask policy op. (define_attr "mask_policy_op_idx" "" (cond [(eq_attr "type" "vlde,vste,vlds") (const_int 6)] (const_int INVALID_ATTRIBUTE))) ;; The index of operand[] to get the mask policy op. (define_attr "avl_type_op_idx" "" (cond [(eq_attr "type" "vlde,vlde,vste,vimov,vimov,vimov,vfmov,vlds,vlds") (const_int 7) (eq_attr "type" "vldm,vstm,vimov,vmalu,vmalu") (const_int 5)] (const_int INVALID_ATTRIBUTE))) ;; The tail policy op value. (define_attr "ta" "" (cond [(eq_attr "type" "vlde,vimov,vfmov,vlds") (symbol_ref "riscv_vector::get_ta(operands[5])")] (const_int INVALID_ATTRIBUTE))) ;; The mask policy op value. (define_attr "ma" "" (cond [(eq_attr "type" "vlde,vlds") (symbol_ref "riscv_vector::get_ma(operands[6])")] (const_int INVALID_ATTRIBUTE))) ;; The avl type value. (define_attr "avl_type" "" (cond [(eq_attr "type" "vlde,vlde,vste,vimov,vimov,vimov,vfmov,vlds,vlds") (symbol_ref "INTVAL (operands[7])") (eq_attr "type" "vldm,vstm,vimov,vmalu,vmalu") (symbol_ref "INTVAL (operands[5])")] (const_int INVALID_ATTRIBUTE))) ;; ----------------------------------------------------------------- ;; ---- Miscellaneous Operations ;; ----------------------------------------------------------------- (define_insn "vundefined" [(set (match_operand:V 0 "register_operand" "=vr") (unspec:V [(const_int 0)] UNSPEC_VUNDEF))] "TARGET_VECTOR" "") ;; This pattern is used to hold the AVL operand for ;; RVV instructions that implicity use VLMAX AVL. ;; RVV instruction implicitly use GPR that is ultimately ;; defined by this pattern is safe for VSETVL pass emit ;; a vsetvl instruction modify this register after RA. ;; Case 1: ;; vlmax_avl a5 ;; ... (across many blocks) ;; vadd (implicit use a5) ====> emit: vsetvl a5,zero ;; Case 2: ;; vlmax_avl a5 ;; ... (across many blocks) ;; mv a6,a5 ;; ... (across many blocks) ;; vadd (implicit use a6) ====> emit: vsetvl a6,zero ;; Case 3: ;; vlmax_avl a5 ;; ... (across many blocks) ;; store mem,a5 (spill) ;; ... (across many blocks) ;; load a7,mem (spill) ;; ... (across many blocks) ;; vadd (implicit use a7) ====> emit: vsetvl a7,zero ;; Such cases are all safe for VSETVL PASS to emit a vsetvl ;; instruction that modifies the AVL operand. (define_insn "@vlmax_avl" [(set (match_operand:P 0 "register_operand" "=r") (unspec:P [(match_operand:P 1 "const_int_operand" "i")] UNSPEC_VLMAX))] "TARGET_VECTOR" "") ;; ----------------------------------------------------------------- ;; ---- Moves Operations ;; ----------------------------------------------------------------- (define_expand "mov" [(set (match_operand:V 0 "reg_or_mem_operand") (match_operand:V 1 "vector_move_operand"))] "TARGET_VECTOR" { /* For whole register move, we transform the pattern into the format that excludes the clobber of scratch register. We include clobber of a scalar scratch register which is going to be used for emit of vsetvl instruction after reload_completed since we need vsetvl instruction to set VL/VTYPE global status for fractional vector load/store. For example: [(set (match_operand:VNx1QI v24) (match_operand:VNx1QI (mem: a4))) (clobber (scratch:SI a5))] ====>> vsetvl a5,zero,e8,mf8 ====>> vle8.v v24,(a4) Philosophy: - Clobber a scalar scratch register for each mov. - Classify the machine_mode mode = mode into 2 class: Whole register move and fractional register move. - Transform and remove scratch clobber register for whole register move so that we can avoid occupying the scalar registers. - We can not leave it to TARGET_SECONDARY_RELOAD since it happens before spilling. The clobber scratch is used by spilling fractional registers in IRA/LRA so it's too early. */ if (riscv_vector::legitimize_move (operands[0], operands[1], mode)) DONE; }) ;; This pattern is used for code-gen for whole register load/stores. ;; Also applicable for all register moves. ;; Fractional vector modes load/store are not allowed to match this pattern. ;; Mask modes load/store are not allowed to match this pattern. ;; We seperate "*mov" into "*mov_whole" and "*mov_fract" because ;; we don't want to include fractional load/store in "*mov" which will ;; create unexpected patterns in LRA. ;; For example: ;; ira rtl: ;; (insn 20 19 9 2 (set (reg/v:VNx2QI 97 v1 [ v1 ]) ;; (reg:VNx2QI 134 [ _1 ])) "rvv.c":9:22 571 {*movvnx2qi_fract} ;; (nil)) ;; When the value of pseudo register 134 of the insn above is discovered already ;; spilled in the memory during LRA. ;; LRA will reload this pattern into a memory load instruction pattern. ;; Because VNx2QI is a fractional vector, we want LRA reload this pattern into ;; (insn 20 19 9 2 (parallel [ ;; (set (reg:VNx2QI 98 v2 [orig:134 _1 ] [134]) ;; (mem/c:VNx2QI (reg:SI 13 a3 [155]) [1 %sfp+[-2, -2] S[2, 2] A8])) ;; (clobber (reg:SI 14 a4 [149]))]) ;; So that we could be able to emit vsetvl instruction using clobber sratch a4. ;; To let LRA generate the expected pattern, we should exclude fractional vector ;; load/store in "*mov_whole". Otherwise, it will reload this pattern into: ;; (insn 20 19 9 2 (set (reg:VNx2QI 98 v2 [orig:134 _1 ] [134]) ;; (mem/c:VNx2QI (reg:SI 13 a3 [155]) [1 %sfp+[-2, -2] S[2, 2] A8]))) ;; which is not the pattern we want. ;; According the facts above, we make "*mov_whole" includes load/store/move for whole ;; vector modes according to '-march' and "*mov_fract" only include fractional vector modes. (define_insn "*mov_whole" [(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr, m,vr") (match_operand:V_WHOLE 1 "reg_or_mem_operand" " m,vr,vr"))] "TARGET_VECTOR" "@ vl%m1re.v\t%0,%1 vs%m1r.v\t%1,%0 vmv%m1r.v\t%0,%1" [(set_attr "type" "vldr,vstr,vmov") (set_attr "mode" "")]) (define_insn "*mov_fract" [(set (match_operand:V_FRACT 0 "register_operand" "=vr") (match_operand:V_FRACT 1 "register_operand" " vr"))] "TARGET_VECTOR" "vmv1r.v\t%0,%1" [(set_attr "type" "vmov") (set_attr "mode" "")]) (define_expand "mov" [(set (match_operand:VB 0 "reg_or_mem_operand") (match_operand:VB 1 "vector_move_operand"))] "TARGET_VECTOR" { if (riscv_vector::legitimize_move (operands[0], operands[1], mode)) DONE; }) (define_insn "*mov" [(set (match_operand:VB 0 "register_operand" "=vr") (match_operand:VB 1 "register_operand" " vr"))] "TARGET_VECTOR" "vmv1r.v\t%0,%1" [(set_attr "type" "vmov") (set_attr "mode" "")]) ;; ----------------------------------------------------------------- ;; ---- Duplicate Operations ;; ----------------------------------------------------------------- ;; According to GCC internal: ;; This pattern only handles duplicates of non-constant inputs. ;; Constant vectors go through the movm pattern instead. ;; So "direct_broadcast_operand" can only be mem or reg, no CONSTANT. (define_expand "vec_duplicate" [(set (match_operand:V 0 "register_operand") (vec_duplicate:V (match_operand: 1 "direct_broadcast_operand")))] "TARGET_VECTOR" { riscv_vector::emit_pred_op ( code_for_pred_broadcast (mode), operands[0], operands[1], mode); DONE; } ) ;; ----------------------------------------------------------------- ;; ---- 6. Configuration-Setting Instructions ;; ----------------------------------------------------------------- ;; Includes: ;; - 6.1 vsetvli/vsetivl/vsetvl instructions ;; ----------------------------------------------------------------- ;; we dont't define vsetvli as unspec_volatile which has side effects. ;; This instruction can be scheduled by the instruction scheduler. ;; This means these instructions will be deleted when ;; there is no instructions using vl or vtype in the following. ;; rd | rs1 | AVL value | Effect on vl ;; - | !x0 | x[rs1] | Normal stripmining ;; !x0 | x0 | ~0 | Set vl to VLMAX ;; operands[0]: VL. ;; operands[1]: AVL. ;; operands[2]: SEW ;; operands[3]: LMUL ;; operands[4]: Tail policy 0 or 1 (undisturbed/agnostic) ;; operands[5]: Mask policy 0 or 1 (undisturbed/agnostic) ;; We define 2 types of "vsetvl*" instruction patterns: ;; - "@vsetvl" is a parallel format which has side effects. ;; - "@vsetvl_no_side_effects" has no side effects. ;; - "@vsetvl" is used by "vsetvl" intrinsics and "insert-vsetvl" PASS. ;; - "@vsetvl_no_side_effects" is used by GCC standard patterns. ;; - "@vsetvl" includes VL/VTYPE global registers status (define set) ;; and each RVV instruction includes VL/VTYPE global registers status (use) ;; so that we can guarantee each RVV instruction can execute with correct ;; VL/VTYPE global registers status after "insert-vsetvl" PASS. ;; - "@vsetvl_no_side_effects" has no side effects and excludes VL/VTYPE ;; global registers status (define set). It's only used by GCC standard pattern ;; expansion. For example: "mov" pattern for fractional vector modes which ;; need to set VL/VTYPE. Then we could manually call this pattern to gain benefits ;; from the optimization of each GCC internal PASS. ;; 1. void foo (float *in, float *out) ;; { ;; vfloat32mf2_t v = *(vfloat32mf2_t*)in; ;; *(vfloat32mf2_t*)out = v; ;; } ;; We could eliminate the second "vsetvl" by calling "@vsetvl_no_side_effects". ;; ;; "@vsetvl": ;; "@vsetvl_no_side_effects": ;; vsetvli a4,zero,e32,mf2,ta,ma ;; vsetvli a4,zero,e32,mf2,ta,ma ;; vle32.v v24,(a0) ;; vle32.v v24,(a0) ;; vsetvli a4,zero,e32,mf2,ta,ma ;; -- ;; vse32.v v24,(a1) ;; vse32.v v24,(a1) ;; ret ;; ret ;; 2. void foo (int8_t *in, int8_t *out, int M) ;; { ;; for (int i = 0; i < M; i++){ ;; vint8mf2_t v = *(vint8mf2_t*)(in + i); ;; *(vint8mf2_t*)(out + i) = v; ;; } ;; } ;; ;; Hoist "vsetvl" instruction in LICM: ;; "@vsetvl": ;; "@vsetvl_no_side_effects": ;; - ;; vsetvli a4,zero,e32,mf2,ta,ma ;; LOOP: ;; LOOP: ;; vsetvli a4,zero,e32,mf2,ta,ma ;; - ;; vle32.v v24,(a0) ;; vle32.v v24,(a0) ;; vsetvli a4,zero,e32,mf2,ta,ma ;; - ;; vse32.v v24,(a1) ;; vse32.v v24,(a1) ;; However, it may produce wrong codegen if we exclude VL/VTYPE in "vsevl". ;; 3. void foo (int8_t *in, int8_t *out, int32_t *in2, int32_t *out2, int M) ;; { ;; for (int i = 0; i < M; i++){ ;; vint8mf2_t v = *(vint8mf2_t*)(in + i); ;; vint32mf2_t v2 = *(vint32mf2_t*)(in + i + i); ;; *(vint8mf2_t*)(out + i) = v; ;; *(vint32mf2_t*)(out + i + i) = v2; ;; } ;; } ;; ;; vsetvli a6,zero,e8,mf2,ta,ma ;; vsetvli a2,zero,e32,mf2,ta,ma ;; LOOP: ;; vle8.v v25,(a0) ;; vle32.v v24,(a5) ;; addi a0,a0,1 ;; vse8.v v25,(a1) ;; vse32.v v24,(a3) ;; ;; Both vle8.v and vle32.v are using the wrong VL/VTYPE status. ;; We leave it to "insert-vsetvl" PASS to correct this situation. ;; The "insert-vsetvl" PASS mechanism: ;; 1. Before "insert-vsetvl" PASS, only RVV instructions are generated ;; by GCC standard pattern expansion has the corresponding "vsetvl". ;; We exploit each GCC internal optimization pass to optimize the "vsetvl". ;; 2. Correct the VL/VTYPE status for each GCC standard pattern RVV instructions. ;; Insert vsetvl for each RVV instructions that has no VL/VTYPE status if necessary. ;; For example: RVV intrinsics. ;; 3. Optimize "vsetvl" instructions. (define_insn "@vsetvl" [(set (match_operand:P 0 "register_operand" "=r") (unspec:P [(match_operand:P 1 "csr_operand" "rK") (match_operand 2 "const_int_operand" "i") (match_operand 3 "const_int_operand" "i") (match_operand 4 "const_int_operand" "i") (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL)) (set (reg:SI VL_REGNUM) (unspec:SI [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_VSETVL)) (set (reg:SI VTYPE_REGNUM) (unspec:SI [(match_dup 2) (match_dup 3) (match_dup 4) (match_dup 5)] UNSPEC_VSETVL))] "TARGET_VECTOR" "vset%i1vli\t%0,%1,e%2,%m3,t%p4,m%p5" [(set_attr "type" "vsetvl") (set_attr "mode" "")]) ;; vsetvl zero,zero,vtype instruction. ;; This pattern has no side effects and does not set X0 register. (define_insn "vsetvl_vtype_change_only" [(set (reg:SI VTYPE_REGNUM) (unspec:SI [(match_operand 0 "const_int_operand" "i") (match_operand 1 "const_int_operand" "i") (match_operand 2 "const_int_operand" "i") (match_operand 3 "const_int_operand" "i")] UNSPEC_VSETVL))] "TARGET_VECTOR" "vsetvli\tzero,zero,e%0,%m1,t%p2,m%p3" [(set_attr "type" "vsetvl") (set_attr "mode" "SI")]) ;; vsetvl zero,rs1,vtype instruction. ;; The reason we need this pattern since we should avoid setting X0 register ;; in vsetvl instruction pattern. (define_insn "@vsetvl_discard_result" [(set (reg:SI VL_REGNUM) (unspec:SI [(match_operand:P 0 "csr_operand" "rK") (match_operand 1 "const_int_operand" "i") (match_operand 2 "const_int_operand" "i")] UNSPEC_VSETVL)) (set (reg:SI VTYPE_REGNUM) (unspec:SI [(match_dup 1) (match_dup 2) (match_operand 3 "const_int_operand" "i") (match_operand 4 "const_int_operand" "i")] UNSPEC_VSETVL))] "TARGET_VECTOR" "vsetvli\tzero,%0,e%1,%m2,t%p3,m%p4" [(set_attr "type" "vsetvl") (set_attr "mode" "")]) ;; It's emit by vsetvl/vsetvlmax intrinsics with no side effects. ;; Since we have many optmization passes from "expand" to "reload_completed", ;; such pattern can allow us gain benefits of these optimizations. (define_insn_and_split "@vsetvl_no_side_effects" [(set (match_operand:P 0 "register_operand" "=r") (unspec:P [(match_operand:P 1 "csr_operand" "rK") (match_operand 2 "const_int_operand" "i") (match_operand 3 "const_int_operand" "i") (match_operand 4 "const_int_operand" "i") (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))] "TARGET_VECTOR" "#" "&& epilogue_completed" [(parallel [(set (match_dup 0) (unspec:P [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4) (match_dup 5)] UNSPEC_VSETVL)) (set (reg:SI VL_REGNUM) (unspec:SI [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_VSETVL)) (set (reg:SI VTYPE_REGNUM) (unspec:SI [(match_dup 2) (match_dup 3) (match_dup 4) (match_dup 5)] UNSPEC_VSETVL))])] "" [(set_attr "type" "vsetvl") (set_attr "mode" "SI")]) ;; RVV machine description matching format ;; (define_insn "" ;; [(set (match_operand:MODE 0) ;; (if_then_else:MODE ;; (unspec: ;; [(match_operand: 1 "vector_mask_operand") ;; (match_operand N + 4 "vector_length_operand") ;; (match_operand N + 5 "const_int_operand") ;; (match_operand N + 6 "const_int_operand") ;; (reg:SI VL_REGNUM) ;; (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) ;; (instruction operation:MODE ;; (match_operand 3 ;; (match_operand 4 ;; (match_operand 5 ;; ................ ;; (match_operand N + 3) ;; (match_operand:MODE 2 "vector_reg_or_const0_operand")))] ;; ;; (unspec:[........] UNSPEC_VPREDICATE) is a predicate wrapper. ;; Include mask predicate && length predicate && vector policy. ;; ------------------------------------------------------------------------------- ;; ---- Predicated Mov ;; ------------------------------------------------------------------------------- ;; Includes: ;; - 7.4. Vector Unit-Stride Instructions ;; - 11.16 Vector Integer Move Instructions ;; - 13.16 Vector Floating-Point Move Instruction ;; - 15.1 Vector Mask-Register Logical Instructions ;; ------------------------------------------------------------------------------- ;; vle.v/vse.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f. ;; For vle.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f, we may need merge and mask operand. ;; For vse.v, we don't need merge operand, so it should always match "vu". ;; constraint alternative 0 ~ 1 match vle.v. ;; constraint alternative 2 match vse.v. ;; constraint alternative 3 match vmv.v.v. ;; constraint alternative 4 match vmv.v.i. ;; For vmv.v.i, we allow 2 following cases: ;; 1. (const_vector:VNx1QI repeat [ ;; (const_int:QI N)]), -15 <= N < 16. ;; 2. (const_vector:VNx1SF repeat [ ;; (const_double:SF 0.0 [0x0.0p+0])]). (define_insn_and_split "@pred_mov" [(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr") (if_then_else:V (unspec: [(match_operand: 1 "vector_mask_operand" "vmWc1, vmWc1, vmWc1, Wc1, Wc1") (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK") (match_operand 5 "const_int_operand" " i, i, i, i, i") (match_operand 6 "const_int_operand" " i, i, i, i, i") (match_operand 7 "const_int_operand" " i, i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0") (match_operand:V 2 "vector_merge_operand" " 0, vu, vu0, vu0, vu0")))] "TARGET_VECTOR" "@ vle.v\t%0,%3%p1 vle.v\t%0,%3%p1 vse.v\t%3,%0%p1 vmv.v.v\t%0,%3 vmv.v.i\t%0,%v3" "&& register_operand (operands[0], mode) && register_operand (operands[3], mode) && satisfies_constraint_vu (operands[2])" [(set (match_dup 0) (match_dup 3))] "" [(set_attr "type" "vlde,vlde,vste,vimov,vimov") (set_attr "mode" "")]) ;; vlm.v/vsm.v/vmclr.m/vmset.m. ;; constraint alternative 0 match vlm.v. ;; constraint alternative 1 match vsm.v. ;; constraint alternative 3 match vmclr.m. ;; constraint alternative 4 match vmset.m. (define_insn_and_split "@pred_mov" [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr, vr") (if_then_else:VB (unspec:VB [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1, Wc1") (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK") (match_operand 5 "const_int_operand" " i, i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (match_operand:VB 3 "vector_move_operand" " m, vr, vr, Wc0, Wc1") (match_operand:VB 2 "vector_merge_operand" " vu, vu0, vu, vu, vu")))] "TARGET_VECTOR" "@ vlm.v\t%0,%3 vsm.v\t%3,%0 # vmclr.m\t%0 vmset.m\t%0" "&& register_operand (operands[0], mode) && register_operand (operands[3], mode)" [(set (match_dup 0) (match_dup 3))] "" [(set_attr "type" "vldm,vstm,vimov,vmalu,vmalu") (set_attr "mode" "")]) ;; ------------------------------------------------------------------------------- ;; ---- Predicated Broadcast ;; ------------------------------------------------------------------------------- ;; Includes: ;; - 7.5. Vector Strided Instructions (zero stride) ;; - 11.16 Vector Integer Move Instructions (vmv.v.x) ;; - 13.16 Vector Floating-Point Move Instruction (vfmv.v.f) ;; ------------------------------------------------------------------------------- (define_insn "@pred_broadcast" [(set (match_operand:V 0 "register_operand" "=vr, vr, vr, vr") (if_then_else:V (unspec: [(match_operand: 1 "vector_mask_operand" " Wc1, Wc1, vm, Wc1") (match_operand 4 "vector_length_operand" " rK, rK, rK, rK") (match_operand 5 "const_int_operand" " i, i, i, i") (match_operand 6 "const_int_operand" " i, i, i, i") (match_operand 7 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (vec_duplicate:V (match_operand: 3 "direct_broadcast_operand" " r, f, Wdm, Wdm")) (match_operand:V 2 "vector_merge_operand" "vu0, vu0, vu0, vu0")))] "TARGET_VECTOR" "@ vmv.v.x\t%0,%3 vfmv.v.f\t%0,%3 vlse.v\t%0,%3,zero,%1.t vlse.v\t%0,%3,zero" [(set_attr "type" "vimov,vfmov,vlds,vlds") (set_attr "mode" "")])