;; Machine description for RISC-V 'V' Extension for GNU compiler.
;; Copyright (C) 2022-2022 Free Software Foundation, Inc.
;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
;; This file is part of GCC.
;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
;; This file describes the RISC-V 'V' Extension, Version 1.0.
;;
;; This file include :
;;
;; - Intrinsics (https://github.com/riscv/rvv-intrinsic-doc)
;; - Auto-vectorization (TBD)
;; - Combine optimization (TBD)
(include "vector-iterators.md")
(define_c_enum "unspec" [
UNSPEC_VSETVL
UNSPEC_VUNDEF
UNSPEC_VPREDICATE
UNSPEC_VLMAX
])
(define_constants [
(INVALID_ATTRIBUTE 255)
])
;; True if the type is RVV instructions that include VTYPE
;; global status register in the use op list.
;; We known VTYPE has 4 fields: SEW, LMUL, TA, MA.
;; The instruction need any of VTYPE field is set as true
;; in this attribute.
(define_attr "has_vtype_op" "false,true"
(cond [(eq_attr "type" "vlde,vste,vldm,vstm,vlds,vsts,\
vldux,vldox,vstux,vstox,vldff,\
vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,\
vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov,\
vsalu,vaalu,vsmul,vsshift,vnclip,\
vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,\
vfcmp,vfsgnj,vfclass,vfmerge,vfmov,\
vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,\
vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,\
vired,viwred,vfred,vfredo,vfwred,vfwredo,\
vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,\
vislide,vislide1,vfslide1,vgather,vcompress")
(const_string "true")]
(const_string "false")))
;; True if the type is RVV instructions that include VL
;; global status register in the use op list.
;; The instruction need vector length to be specified is set
;; in this attribute.
(define_attr "has_vl_op" "false,true"
(cond [(eq_attr "type" "vlde,vste,vldm,vstm,vlds,vsts,\
vldux,vldox,vstux,vstox,vldff,\
vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,\
vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov,\
vsalu,vaalu,vsmul,vsshift,vnclip,\
vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,\
vfcmp,vfsgnj,vfclass,vfmerge,vfmov,\
vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,\
vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,\
vired,viwred,vfred,vfredo,vfwred,vfwredo,\
vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovxv,vfmovfv,\
vislide,vislide1,vfslide1,vgather,vcompress")
(const_string "true")]
(const_string "false")))
;; The default SEW of RVV instruction. This attribute doesn't mean the instruction
;; is necessary to require SEW check for example vlm.v which require ratio to
;; check. However, we need default value of SEW for vsetvl instruction since there
;; is no field for ratio in the vsetvl instruction encoding.
(define_attr "sew" ""
(cond [(eq_attr "mode" "VNx1QI,VNx2QI,VNx4QI,VNx8QI,VNx16QI,VNx32QI,VNx64QI,\
VNx1BI,VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI")
(const_int 8)
(eq_attr "mode" "VNx1HI,VNx2HI,VNx4HI,VNx8HI,VNx16HI,VNx32HI")
(const_int 16)
(eq_attr "mode" "VNx1SI,VNx2SI,VNx4SI,VNx8SI,VNx16SI,\
VNx1SF,VNx2SF,VNx4SF,VNx8SF,VNx16SF")
(const_int 32)
(eq_attr "mode" "VNx1DI,VNx2DI,VNx4DI,VNx8DI,\
VNx1DF,VNx2DF,VNx4DF,VNx8DF")
(const_int 64)
(eq_attr "type" "vsetvl")
(if_then_else (eq_attr "INSN_CODE (curr_insn) == CODE_FOR_vsetvldi
|| INSN_CODE (curr_insn) == CODE_FOR_vsetvlsi")
(symbol_ref "INTVAL (operands[2])")
(const_int INVALID_ATTRIBUTE))]
(const_int INVALID_ATTRIBUTE)))
;; Ditto to LMUL.
(define_attr "vlmul" ""
(cond [(eq_attr "mode" "VNx1QI,VNx1BI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx1QImode)")
(eq_attr "mode" "VNx2QI,VNx2BI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx2QImode)")
(eq_attr "mode" "VNx4QI,VNx4BI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx4QImode)")
(eq_attr "mode" "VNx8QI,VNx8BI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx8QImode)")
(eq_attr "mode" "VNx16QI,VNx16BI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx16QImode)")
(eq_attr "mode" "VNx32QI,VNx32BI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx32QImode)")
(eq_attr "mode" "VNx64QI,VNx64BI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx64QImode)")
(eq_attr "mode" "VNx1HI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx1HImode)")
(eq_attr "mode" "VNx2HI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx2HImode)")
(eq_attr "mode" "VNx4HI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx4HImode)")
(eq_attr "mode" "VNx8HI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx8HImode)")
(eq_attr "mode" "VNx16HI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx16HImode)")
(eq_attr "mode" "VNx32HI")
(symbol_ref "riscv_vector::get_vlmul(E_VNx32HImode)")
(eq_attr "mode" "VNx1SI,VNx1SF")
(symbol_ref "riscv_vector::get_vlmul(E_VNx1SImode)")
(eq_attr "mode" "VNx2SI,VNx2SF")
(symbol_ref "riscv_vector::get_vlmul(E_VNx2SImode)")
(eq_attr "mode" "VNx4SI,VNx4SF")
(symbol_ref "riscv_vector::get_vlmul(E_VNx4SImode)")
(eq_attr "mode" "VNx8SI,VNx8SF")
(symbol_ref "riscv_vector::get_vlmul(E_VNx8SImode)")
(eq_attr "mode" "VNx16SI,VNx16SF")
(symbol_ref "riscv_vector::get_vlmul(E_VNx16SImode)")
(eq_attr "mode" "VNx1DI,VNx1DF")
(symbol_ref "riscv_vector::get_vlmul(E_VNx1DImode)")
(eq_attr "mode" "VNx2DI,VNx2DF")
(symbol_ref "riscv_vector::get_vlmul(E_VNx2DImode)")
(eq_attr "mode" "VNx4DI,VNx4DF")
(symbol_ref "riscv_vector::get_vlmul(E_VNx4DImode)")
(eq_attr "mode" "VNx8DI,VNx8DF")
(symbol_ref "riscv_vector::get_vlmul(E_VNx8DImode)")
(eq_attr "type" "vsetvl")
(if_then_else (eq_attr "INSN_CODE (curr_insn) == CODE_FOR_vsetvldi
|| INSN_CODE (curr_insn) == CODE_FOR_vsetvlsi")
(symbol_ref "INTVAL (operands[3])")
(const_int INVALID_ATTRIBUTE))]
(const_int INVALID_ATTRIBUTE)))
;; It is valid for instruction that require sew/lmul ratio.
(define_attr "ratio" ""
(cond [(eq_attr "type" "vimov,vfmov")
(const_int INVALID_ATTRIBUTE)
(eq_attr "mode" "VNx1QI,VNx1BI")
(symbol_ref "riscv_vector::get_ratio(E_VNx1QImode)")
(eq_attr "mode" "VNx2QI,VNx2BI")
(symbol_ref "riscv_vector::get_ratio(E_VNx2QImode)")
(eq_attr "mode" "VNx4QI,VNx4BI")
(symbol_ref "riscv_vector::get_ratio(E_VNx4QImode)")
(eq_attr "mode" "VNx8QI,VNx8BI")
(symbol_ref "riscv_vector::get_ratio(E_VNx8QImode)")
(eq_attr "mode" "VNx16QI,VNx16BI")
(symbol_ref "riscv_vector::get_ratio(E_VNx16QImode)")
(eq_attr "mode" "VNx32QI,VNx32BI")
(symbol_ref "riscv_vector::get_ratio(E_VNx32QImode)")
(eq_attr "mode" "VNx64QI,VNx64BI")
(symbol_ref "riscv_vector::get_ratio(E_VNx64QImode)")
(eq_attr "mode" "VNx1HI")
(symbol_ref "riscv_vector::get_ratio(E_VNx1HImode)")
(eq_attr "mode" "VNx2HI")
(symbol_ref "riscv_vector::get_ratio(E_VNx2HImode)")
(eq_attr "mode" "VNx4HI")
(symbol_ref "riscv_vector::get_ratio(E_VNx4HImode)")
(eq_attr "mode" "VNx8HI")
(symbol_ref "riscv_vector::get_ratio(E_VNx8HImode)")
(eq_attr "mode" "VNx16HI")
(symbol_ref "riscv_vector::get_ratio(E_VNx16HImode)")
(eq_attr "mode" "VNx32HI")
(symbol_ref "riscv_vector::get_ratio(E_VNx32HImode)")
(eq_attr "mode" "VNx1SI,VNx1SF")
(symbol_ref "riscv_vector::get_ratio(E_VNx1SImode)")
(eq_attr "mode" "VNx2SI,VNx2SF")
(symbol_ref "riscv_vector::get_ratio(E_VNx2SImode)")
(eq_attr "mode" "VNx4SI,VNx4SF")
(symbol_ref "riscv_vector::get_ratio(E_VNx4SImode)")
(eq_attr "mode" "VNx8SI,VNx8SF")
(symbol_ref "riscv_vector::get_ratio(E_VNx8SImode)")
(eq_attr "mode" "VNx16SI,VNx16SF")
(symbol_ref "riscv_vector::get_ratio(E_VNx16SImode)")
(eq_attr "mode" "VNx1DI,VNx1DF")
(symbol_ref "riscv_vector::get_ratio(E_VNx1DImode)")
(eq_attr "mode" "VNx2DI,VNx2DF")
(symbol_ref "riscv_vector::get_ratio(E_VNx2DImode)")
(eq_attr "mode" "VNx4DI,VNx4DF")
(symbol_ref "riscv_vector::get_ratio(E_VNx4DImode)")
(eq_attr "mode" "VNx8DI,VNx8DF")
(symbol_ref "riscv_vector::get_ratio(E_VNx8DImode)")]
(const_int INVALID_ATTRIBUTE)))
;; The index of operand[] to get the merge op.
(define_attr "merge_op_idx" ""
(cond [(eq_attr "type" "vlde,vste,vimov,vfmov,vldm,vstm,vlds,vmalu")
(const_int 2)]
(const_int INVALID_ATTRIBUTE)))
;; The index of operand[] to get the avl op.
(define_attr "vl_op_idx" ""
(cond [(eq_attr "type" "vlde,vste,vimov,vfmov,vldm,vstm,vlds,vmalu")
(const_int 4)]
(const_int INVALID_ATTRIBUTE)))
;; The index of operand[] to get the tail policy op.
(define_attr "tail_policy_op_idx" ""
(cond [(eq_attr "type" "vlde,vste,vimov,vfmov,vlds")
(const_int 5)]
(const_int INVALID_ATTRIBUTE)))
;; The index of operand[] to get the mask policy op.
(define_attr "mask_policy_op_idx" ""
(cond [(eq_attr "type" "vlde,vste,vlds")
(const_int 6)]
(const_int INVALID_ATTRIBUTE)))
;; The index of operand[] to get the mask policy op.
(define_attr "avl_type_op_idx" ""
(cond [(eq_attr "type" "vlde,vlde,vste,vimov,vimov,vimov,vfmov,vlds,vlds")
(const_int 7)
(eq_attr "type" "vldm,vstm,vimov,vmalu,vmalu")
(const_int 5)]
(const_int INVALID_ATTRIBUTE)))
;; The tail policy op value.
(define_attr "ta" ""
(cond [(eq_attr "type" "vlde,vimov,vfmov,vlds")
(symbol_ref "riscv_vector::get_ta(operands[5])")]
(const_int INVALID_ATTRIBUTE)))
;; The mask policy op value.
(define_attr "ma" ""
(cond [(eq_attr "type" "vlde,vlds")
(symbol_ref "riscv_vector::get_ma(operands[6])")]
(const_int INVALID_ATTRIBUTE)))
;; The avl type value.
(define_attr "avl_type" ""
(cond [(eq_attr "type" "vlde,vlde,vste,vimov,vimov,vimov,vfmov,vlds,vlds")
(symbol_ref "INTVAL (operands[7])")
(eq_attr "type" "vldm,vstm,vimov,vmalu,vmalu")
(symbol_ref "INTVAL (operands[5])")]
(const_int INVALID_ATTRIBUTE)))
;; -----------------------------------------------------------------
;; ---- Miscellaneous Operations
;; -----------------------------------------------------------------
(define_insn "vundefined"
[(set (match_operand:V 0 "register_operand" "=vr")
(unspec:V [(const_int 0)] UNSPEC_VUNDEF))]
"TARGET_VECTOR"
"")
;; This pattern is used to hold the AVL operand for
;; RVV instructions that implicity use VLMAX AVL.
;; RVV instruction implicitly use GPR that is ultimately
;; defined by this pattern is safe for VSETVL pass emit
;; a vsetvl instruction modify this register after RA.
;; Case 1:
;; vlmax_avl a5
;; ... (across many blocks)
;; vadd (implicit use a5) ====> emit: vsetvl a5,zero
;; Case 2:
;; vlmax_avl a5
;; ... (across many blocks)
;; mv a6,a5
;; ... (across many blocks)
;; vadd (implicit use a6) ====> emit: vsetvl a6,zero
;; Case 3:
;; vlmax_avl a5
;; ... (across many blocks)
;; store mem,a5 (spill)
;; ... (across many blocks)
;; load a7,mem (spill)
;; ... (across many blocks)
;; vadd (implicit use a7) ====> emit: vsetvl a7,zero
;; Such cases are all safe for VSETVL PASS to emit a vsetvl
;; instruction that modifies the AVL operand.
(define_insn "@vlmax_avl"
[(set (match_operand:P 0 "register_operand" "=r")
(unspec:P [(match_operand:P 1 "const_int_operand" "i")] UNSPEC_VLMAX))]
"TARGET_VECTOR"
"")
;; -----------------------------------------------------------------
;; ---- Moves Operations
;; -----------------------------------------------------------------
(define_expand "mov"
[(set (match_operand:V 0 "reg_or_mem_operand")
(match_operand:V 1 "vector_move_operand"))]
"TARGET_VECTOR"
{
/* For whole register move, we transform the pattern into the format
that excludes the clobber of scratch register.
We include clobber of a scalar scratch register which is going to be
used for emit of vsetvl instruction after reload_completed since we
need vsetvl instruction to set VL/VTYPE global status for fractional
vector load/store.
For example:
[(set (match_operand:VNx1QI v24)
(match_operand:VNx1QI (mem: a4)))
(clobber (scratch:SI a5))]
====>> vsetvl a5,zero,e8,mf8
====>> vle8.v v24,(a4)
Philosophy:
- Clobber a scalar scratch register for each mov.
- Classify the machine_mode mode = mode into 2 class:
Whole register move and fractional register move.
- Transform and remove scratch clobber register for whole
register move so that we can avoid occupying the scalar
registers.
- We can not leave it to TARGET_SECONDARY_RELOAD since it happens
before spilling. The clobber scratch is used by spilling fractional
registers in IRA/LRA so it's too early. */
if (riscv_vector::legitimize_move (operands[0], operands[1], mode))
DONE;
})
;; This pattern is used for code-gen for whole register load/stores.
;; Also applicable for all register moves.
;; Fractional vector modes load/store are not allowed to match this pattern.
;; Mask modes load/store are not allowed to match this pattern.
;; We seperate "*mov" into "*mov_whole" and "*mov_fract" because
;; we don't want to include fractional load/store in "*mov" which will
;; create unexpected patterns in LRA.
;; For example:
;; ira rtl:
;; (insn 20 19 9 2 (set (reg/v:VNx2QI 97 v1 [ v1 ])
;; (reg:VNx2QI 134 [ _1 ])) "rvv.c":9:22 571 {*movvnx2qi_fract}
;; (nil))
;; When the value of pseudo register 134 of the insn above is discovered already
;; spilled in the memory during LRA.
;; LRA will reload this pattern into a memory load instruction pattern.
;; Because VNx2QI is a fractional vector, we want LRA reload this pattern into
;; (insn 20 19 9 2 (parallel [
;; (set (reg:VNx2QI 98 v2 [orig:134 _1 ] [134])
;; (mem/c:VNx2QI (reg:SI 13 a3 [155]) [1 %sfp+[-2, -2] S[2, 2] A8]))
;; (clobber (reg:SI 14 a4 [149]))])
;; So that we could be able to emit vsetvl instruction using clobber sratch a4.
;; To let LRA generate the expected pattern, we should exclude fractional vector
;; load/store in "*mov_whole". Otherwise, it will reload this pattern into:
;; (insn 20 19 9 2 (set (reg:VNx2QI 98 v2 [orig:134 _1 ] [134])
;; (mem/c:VNx2QI (reg:SI 13 a3 [155]) [1 %sfp+[-2, -2] S[2, 2] A8])))
;; which is not the pattern we want.
;; According the facts above, we make "*mov_whole" includes load/store/move for whole
;; vector modes according to '-march' and "*mov_fract" only include fractional vector modes.
(define_insn "*mov_whole"
[(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr, m,vr")
(match_operand:V_WHOLE 1 "reg_or_mem_operand" " m,vr,vr"))]
"TARGET_VECTOR"
"@
vl%m1re.v\t%0,%1
vs%m1r.v\t%1,%0
vmv%m1r.v\t%0,%1"
[(set_attr "type" "vldr,vstr,vmov")
(set_attr "mode" "")])
(define_insn "*mov_fract"
[(set (match_operand:V_FRACT 0 "register_operand" "=vr")
(match_operand:V_FRACT 1 "register_operand" " vr"))]
"TARGET_VECTOR"
"vmv1r.v\t%0,%1"
[(set_attr "type" "vmov")
(set_attr "mode" "")])
(define_expand "mov"
[(set (match_operand:VB 0 "reg_or_mem_operand")
(match_operand:VB 1 "vector_move_operand"))]
"TARGET_VECTOR"
{
if (riscv_vector::legitimize_move (operands[0], operands[1], mode))
DONE;
})
(define_insn "*mov"
[(set (match_operand:VB 0 "register_operand" "=vr")
(match_operand:VB 1 "register_operand" " vr"))]
"TARGET_VECTOR"
"vmv1r.v\t%0,%1"
[(set_attr "type" "vmov")
(set_attr "mode" "")])
;; -----------------------------------------------------------------
;; ---- Duplicate Operations
;; -----------------------------------------------------------------
;; According to GCC internal:
;; This pattern only handles duplicates of non-constant inputs.
;; Constant vectors go through the movm pattern instead.
;; So "direct_broadcast_operand" can only be mem or reg, no CONSTANT.
(define_expand "vec_duplicate"
[(set (match_operand:V 0 "register_operand")
(vec_duplicate:V
(match_operand: 1 "direct_broadcast_operand")))]
"TARGET_VECTOR"
{
riscv_vector::emit_pred_op (
code_for_pred_broadcast (mode), operands[0], operands[1], mode);
DONE;
}
)
;; -----------------------------------------------------------------
;; ---- 6. Configuration-Setting Instructions
;; -----------------------------------------------------------------
;; Includes:
;; - 6.1 vsetvli/vsetivl/vsetvl instructions
;; -----------------------------------------------------------------
;; we dont't define vsetvli as unspec_volatile which has side effects.
;; This instruction can be scheduled by the instruction scheduler.
;; This means these instructions will be deleted when
;; there is no instructions using vl or vtype in the following.
;; rd | rs1 | AVL value | Effect on vl
;; - | !x0 | x[rs1] | Normal stripmining
;; !x0 | x0 | ~0 | Set vl to VLMAX
;; operands[0]: VL.
;; operands[1]: AVL.
;; operands[2]: SEW
;; operands[3]: LMUL
;; operands[4]: Tail policy 0 or 1 (undisturbed/agnostic)
;; operands[5]: Mask policy 0 or 1 (undisturbed/agnostic)
;; We define 2 types of "vsetvl*" instruction patterns:
;; - "@vsetvl" is a parallel format which has side effects.
;; - "@vsetvl_no_side_effects" has no side effects.
;; - "@vsetvl" is used by "vsetvl" intrinsics and "insert-vsetvl" PASS.
;; - "@vsetvl_no_side_effects" is used by GCC standard patterns.
;; - "@vsetvl" includes VL/VTYPE global registers status (define set)
;; and each RVV instruction includes VL/VTYPE global registers status (use)
;; so that we can guarantee each RVV instruction can execute with correct
;; VL/VTYPE global registers status after "insert-vsetvl" PASS.
;; - "@vsetvl_no_side_effects" has no side effects and excludes VL/VTYPE
;; global registers status (define set). It's only used by GCC standard pattern
;; expansion. For example: "mov" pattern for fractional vector modes which
;; need to set VL/VTYPE. Then we could manually call this pattern to gain benefits
;; from the optimization of each GCC internal PASS.
;; 1. void foo (float *in, float *out)
;; {
;; vfloat32mf2_t v = *(vfloat32mf2_t*)in;
;; *(vfloat32mf2_t*)out = v;
;; }
;; We could eliminate the second "vsetvl" by calling "@vsetvl_no_side_effects".
;;
;; "@vsetvl": ;; "@vsetvl_no_side_effects":
;; vsetvli a4,zero,e32,mf2,ta,ma ;; vsetvli a4,zero,e32,mf2,ta,ma
;; vle32.v v24,(a0) ;; vle32.v v24,(a0)
;; vsetvli a4,zero,e32,mf2,ta,ma ;; --
;; vse32.v v24,(a1) ;; vse32.v v24,(a1)
;; ret ;; ret
;; 2. void foo (int8_t *in, int8_t *out, int M)
;; {
;; for (int i = 0; i < M; i++){
;; vint8mf2_t v = *(vint8mf2_t*)(in + i);
;; *(vint8mf2_t*)(out + i) = v;
;; }
;; }
;;
;; Hoist "vsetvl" instruction in LICM:
;; "@vsetvl": ;; "@vsetvl_no_side_effects":
;; - ;; vsetvli a4,zero,e32,mf2,ta,ma
;; LOOP: ;; LOOP:
;; vsetvli a4,zero,e32,mf2,ta,ma ;; -
;; vle32.v v24,(a0) ;; vle32.v v24,(a0)
;; vsetvli a4,zero,e32,mf2,ta,ma ;; -
;; vse32.v v24,(a1) ;; vse32.v v24,(a1)
;; However, it may produce wrong codegen if we exclude VL/VTYPE in "vsevl".
;; 3. void foo (int8_t *in, int8_t *out, int32_t *in2, int32_t *out2, int M)
;; {
;; for (int i = 0; i < M; i++){
;; vint8mf2_t v = *(vint8mf2_t*)(in + i);
;; vint32mf2_t v2 = *(vint32mf2_t*)(in + i + i);
;; *(vint8mf2_t*)(out + i) = v;
;; *(vint32mf2_t*)(out + i + i) = v2;
;; }
;; }
;;
;; vsetvli a6,zero,e8,mf2,ta,ma
;; vsetvli a2,zero,e32,mf2,ta,ma
;; LOOP:
;; vle8.v v25,(a0)
;; vle32.v v24,(a5)
;; addi a0,a0,1
;; vse8.v v25,(a1)
;; vse32.v v24,(a3)
;;
;; Both vle8.v and vle32.v are using the wrong VL/VTYPE status.
;; We leave it to "insert-vsetvl" PASS to correct this situation.
;; The "insert-vsetvl" PASS mechanism:
;; 1. Before "insert-vsetvl" PASS, only RVV instructions are generated
;; by GCC standard pattern expansion has the corresponding "vsetvl".
;; We exploit each GCC internal optimization pass to optimize the "vsetvl".
;; 2. Correct the VL/VTYPE status for each GCC standard pattern RVV instructions.
;; Insert vsetvl for each RVV instructions that has no VL/VTYPE status if necessary.
;; For example: RVV intrinsics.
;; 3. Optimize "vsetvl" instructions.
(define_insn "@vsetvl"
[(set (match_operand:P 0 "register_operand" "=r")
(unspec:P [(match_operand:P 1 "csr_operand" "rK")
(match_operand 2 "const_int_operand" "i")
(match_operand 3 "const_int_operand" "i")
(match_operand 4 "const_int_operand" "i")
(match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))
(set (reg:SI VL_REGNUM)
(unspec:SI [(match_dup 1)
(match_dup 2)
(match_dup 3)] UNSPEC_VSETVL))
(set (reg:SI VTYPE_REGNUM)
(unspec:SI [(match_dup 2)
(match_dup 3)
(match_dup 4)
(match_dup 5)] UNSPEC_VSETVL))]
"TARGET_VECTOR"
"vset%i1vli\t%0,%1,e%2,%m3,t%p4,m%p5"
[(set_attr "type" "vsetvl")
(set_attr "mode" "")])
;; vsetvl zero,zero,vtype instruction.
;; This pattern has no side effects and does not set X0 register.
(define_insn "vsetvl_vtype_change_only"
[(set (reg:SI VTYPE_REGNUM)
(unspec:SI
[(match_operand 0 "const_int_operand" "i")
(match_operand 1 "const_int_operand" "i")
(match_operand 2 "const_int_operand" "i")
(match_operand 3 "const_int_operand" "i")] UNSPEC_VSETVL))]
"TARGET_VECTOR"
"vsetvli\tzero,zero,e%0,%m1,t%p2,m%p3"
[(set_attr "type" "vsetvl")
(set_attr "mode" "SI")])
;; vsetvl zero,rs1,vtype instruction.
;; The reason we need this pattern since we should avoid setting X0 register
;; in vsetvl instruction pattern.
(define_insn "@vsetvl_discard_result"
[(set (reg:SI VL_REGNUM)
(unspec:SI [(match_operand:P 0 "csr_operand" "rK")
(match_operand 1 "const_int_operand" "i")
(match_operand 2 "const_int_operand" "i")] UNSPEC_VSETVL))
(set (reg:SI VTYPE_REGNUM)
(unspec:SI [(match_dup 1)
(match_dup 2)
(match_operand 3 "const_int_operand" "i")
(match_operand 4 "const_int_operand" "i")] UNSPEC_VSETVL))]
"TARGET_VECTOR"
"vsetvli\tzero,%0,e%1,%m2,t%p3,m%p4"
[(set_attr "type" "vsetvl")
(set_attr "mode" "")])
;; It's emit by vsetvl/vsetvlmax intrinsics with no side effects.
;; Since we have many optmization passes from "expand" to "reload_completed",
;; such pattern can allow us gain benefits of these optimizations.
(define_insn_and_split "@vsetvl_no_side_effects"
[(set (match_operand:P 0 "register_operand" "=r")
(unspec:P [(match_operand:P 1 "csr_operand" "rK")
(match_operand 2 "const_int_operand" "i")
(match_operand 3 "const_int_operand" "i")
(match_operand 4 "const_int_operand" "i")
(match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))]
"TARGET_VECTOR"
"#"
"&& epilogue_completed"
[(parallel
[(set (match_dup 0)
(unspec:P [(match_dup 1) (match_dup 2) (match_dup 3)
(match_dup 4) (match_dup 5)] UNSPEC_VSETVL))
(set (reg:SI VL_REGNUM)
(unspec:SI [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_VSETVL))
(set (reg:SI VTYPE_REGNUM)
(unspec:SI [(match_dup 2) (match_dup 3) (match_dup 4)
(match_dup 5)] UNSPEC_VSETVL))])]
""
[(set_attr "type" "vsetvl")
(set_attr "mode" "SI")])
;; RVV machine description matching format
;; (define_insn ""
;; [(set (match_operand:MODE 0)
;; (if_then_else:MODE
;; (unspec:
;; [(match_operand: 1 "vector_mask_operand")
;; (match_operand N + 4 "vector_length_operand")
;; (match_operand N + 5 "const_int_operand")
;; (match_operand N + 6 "const_int_operand")
;; (reg:SI VL_REGNUM)
;; (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
;; (instruction operation:MODE
;; (match_operand 3
;; (match_operand 4
;; (match_operand 5
;; ................
;; (match_operand N + 3)
;; (match_operand:MODE 2 "vector_reg_or_const0_operand")))]
;;
;; (unspec:[........] UNSPEC_VPREDICATE) is a predicate wrapper.
;; Include mask predicate && length predicate && vector policy.
;; -------------------------------------------------------------------------------
;; ---- Predicated Mov
;; -------------------------------------------------------------------------------
;; Includes:
;; - 7.4. Vector Unit-Stride Instructions
;; - 11.16 Vector Integer Move Instructions
;; - 13.16 Vector Floating-Point Move Instruction
;; - 15.1 Vector Mask-Register Logical Instructions
;; -------------------------------------------------------------------------------
;; vle.v/vse.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f.
;; For vle.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f, we may need merge and mask operand.
;; For vse.v, we don't need merge operand, so it should always match "vu".
;; constraint alternative 0 ~ 1 match vle.v.
;; constraint alternative 2 match vse.v.
;; constraint alternative 3 match vmv.v.v.
;; constraint alternative 4 match vmv.v.i.
;; For vmv.v.i, we allow 2 following cases:
;; 1. (const_vector:VNx1QI repeat [
;; (const_int:QI N)]), -15 <= N < 16.
;; 2. (const_vector:VNx1SF repeat [
;; (const_double:SF 0.0 [0x0.0p+0])]).
(define_insn_and_split "@pred_mov"
[(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr")
(if_then_else:V
(unspec:
[(match_operand: 1 "vector_mask_operand" "vmWc1, vmWc1, vmWc1, Wc1, Wc1")
(match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK")
(match_operand 5 "const_int_operand" " i, i, i, i, i")
(match_operand 6 "const_int_operand" " i, i, i, i, i")
(match_operand 7 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0")
(match_operand:V 2 "vector_merge_operand" " 0, vu, vu0, vu0, vu0")))]
"TARGET_VECTOR"
"@
vle.v\t%0,%3%p1
vle.v\t%0,%3%p1
vse.v\t%3,%0%p1
vmv.v.v\t%0,%3
vmv.v.i\t%0,%v3"
"&& register_operand (operands[0], mode)
&& register_operand (operands[3], mode)
&& satisfies_constraint_vu (operands[2])"
[(set (match_dup 0) (match_dup 3))]
""
[(set_attr "type" "vlde,vlde,vste,vimov,vimov")
(set_attr "mode" "")])
;; vlm.v/vsm.v/vmclr.m/vmset.m.
;; constraint alternative 0 match vlm.v.
;; constraint alternative 1 match vsm.v.
;; constraint alternative 3 match vmclr.m.
;; constraint alternative 4 match vmset.m.
(define_insn_and_split "@pred_mov"
[(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr, vr")
(if_then_else:VB
(unspec:VB
[(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1, Wc1")
(match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK")
(match_operand 5 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operand:VB 3 "vector_move_operand" " m, vr, vr, Wc0, Wc1")
(match_operand:VB 2 "vector_merge_operand" " vu, vu0, vu, vu, vu")))]
"TARGET_VECTOR"
"@
vlm.v\t%0,%3
vsm.v\t%3,%0
#
vmclr.m\t%0
vmset.m\t%0"
"&& register_operand (operands[0], mode)
&& register_operand (operands[3], mode)"
[(set (match_dup 0) (match_dup 3))]
""
[(set_attr "type" "vldm,vstm,vimov,vmalu,vmalu")
(set_attr "mode" "")])
;; -------------------------------------------------------------------------------
;; ---- Predicated Broadcast
;; -------------------------------------------------------------------------------
;; Includes:
;; - 7.5. Vector Strided Instructions (zero stride)
;; - 11.16 Vector Integer Move Instructions (vmv.v.x)
;; - 13.16 Vector Floating-Point Move Instruction (vfmv.v.f)
;; -------------------------------------------------------------------------------
(define_insn "@pred_broadcast"
[(set (match_operand:V 0 "register_operand" "=vr, vr, vr, vr")
(if_then_else:V
(unspec:
[(match_operand: 1 "vector_mask_operand" " Wc1, Wc1, vm, Wc1")
(match_operand 4 "vector_length_operand" " rK, rK, rK, rK")
(match_operand 5 "const_int_operand" " i, i, i, i")
(match_operand 6 "const_int_operand" " i, i, i, i")
(match_operand 7 "const_int_operand" " i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(vec_duplicate:V
(match_operand: 3 "direct_broadcast_operand" " r, f, Wdm, Wdm"))
(match_operand:V 2 "vector_merge_operand" "vu0, vu0, vu0, vu0")))]
"TARGET_VECTOR"
"@
vmv.v.x\t%0,%3
vfmv.v.f\t%0,%3
vlse.v\t%0,%3,zero,%1.t
vlse.v\t%0,%3,zero"
[(set_attr "type" "vimov,vfmov,vlds,vlds")
(set_attr "mode" "")])