;; Machine description for RISC-V 'V' Extension for GNU compiler.
;; Copyright (C) 2022-2022 Free Software Foundation, Inc.
;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
;; This file is part of GCC.
;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
;; This file describes the RISC-V 'V' Extension, Version 1.0.
;;
;; This file include :
;;
;; - Intrinsics (https://github.com/riscv/rvv-intrinsic-doc)
;; - Auto-vectorization (TBD)
;; - Combine optimization (TBD)
(include "vector-iterators.md")
(define_c_enum "unspec" [
UNSPEC_VSETVL
UNSPEC_VUNDEF
UNSPEC_VPREDICATE
])
;; -----------------------------------------------------------------
;; ---- Miscellaneous Operations
;; -----------------------------------------------------------------
(define_insn "vundefined"
[(set (match_operand:V 0 "register_operand" "=vr")
(unspec:V [(const_int 0)] UNSPEC_VUNDEF))]
"TARGET_VECTOR"
"")
;; -----------------------------------------------------------------
;; ---- Moves Operations
;; -----------------------------------------------------------------
(define_expand "mov"
[(set (match_operand:V 0 "reg_or_mem_operand")
(match_operand:V 1 "vector_move_operand"))]
"TARGET_VECTOR"
{
if (riscv_vector::legitimize_move (operands[0], operands[1], mode))
DONE;
})
;; This pattern is used for code-gen for whole register load/stores.
;; Also applicable for all register moves.
;; Fractional vector modes load/store are not allowed to match this pattern.
;; Mask modes load/store are not allowed to match this pattern.
(define_insn "*mov"
[(set (match_operand:V 0 "reg_or_mem_operand" "=vr,m,vr")
(match_operand:V 1 "reg_or_mem_operand" "m,vr,vr"))]
"TARGET_VECTOR && ((register_operand (operands[0], mode)
&& register_operand (operands[1], mode))
|| known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR))"
"@
vl%m1re.v\t%0,%1
vs%m1r.v\t%1,%0
vmv%m1r.v\t%0,%1"
[(set_attr "type" "vldr,vstr,vmov")
(set_attr "mode" "")])
(define_expand "mov"
[(set (match_operand:VB 0 "reg_or_mem_operand")
(match_operand:VB 1 "vector_move_operand"))]
"TARGET_VECTOR"
{
if (riscv_vector::legitimize_move (operands[0], operands[1], mode))
DONE;
})
(define_insn "*mov"
[(set (match_operand:VB 0 "register_operand" "=vr")
(match_operand:VB 1 "register_operand" "vr"))]
"TARGET_VECTOR"
"vmv1r.v\t%0,%1"
[(set_attr "type" "vmov")
(set_attr "mode" "")])
;; -----------------------------------------------------------------
;; ---- 6. Configuration-Setting Instructions
;; -----------------------------------------------------------------
;; Includes:
;; - 6.1 vsetvli/vsetivl/vsetvl instructions
;; -----------------------------------------------------------------
;; we dont't define vsetvli as unspec_volatile which has side effects.
;; This instruction can be scheduled by the instruction scheduler.
;; This means these instructions will be deleted when
;; there is no instructions using vl or vtype in the following.
;; rd | rs1 | AVL value | Effect on vl
;; - | !x0 | x[rs1] | Normal stripmining
;; !x0 | x0 | ~0 | Set vl to VLMAX
;; operands[0]: VL.
;; operands[1]: AVL.
;; operands[2]: SEW
;; operands[3]: LMUL
;; operands[4]: Tail policy 0 or 1 (undisturbed/agnostic)
;; operands[5]: Mask policy 0 or 1 (undisturbed/agnostic)
;; We define 2 types of "vsetvl*" instruction patterns:
;; - "@vsetvl" is a parallel format which has side effects.
;; - "@vsetvl_no_side_effects" has no side effects.
;; - "@vsetvl" is used by "vsetvl" intrinsics and "insert-vsetvl" PASS.
;; - "@vsetvl_no_side_effects" is used by GCC standard patterns.
;; - "@vsetvl" includes VL/VTYPE global registers status (define set)
;; and each RVV instruction includes VL/VTYPE global registers status (use)
;; so that we can guarantee each RVV instruction can execute with correct
;; VL/VTYPE global registers status after "insert-vsetvl" PASS.
;; - "@vsetvl_no_side_effects" has no side effects and excludes VL/VTYPE
;; global registers status (define set). It's only used by GCC standard pattern
;; expansion. For example: "mov" pattern for fractional vector modes which
;; need to set VL/VTYPE. Then we could manually call this pattern to gain benefits
;; from the optimization of each GCC internal PASS.
;; 1. void foo (float *in, float *out)
;; {
;; vfloat32mf2_t v = *(vfloat32mf2_t*)in;
;; *(vfloat32mf2_t*)out = v;
;; }
;; We could eliminate the second "vsetvl" by calling "@vsetvl_no_side_effects".
;;
;; "@vsetvl": ;; "@vsetvl_no_side_effects":
;; vsetvli a4,zero,e32,mf2,ta,ma ;; vsetvli a4,zero,e32,mf2,ta,ma
;; vle32.v v24,(a0) ;; vle32.v v24,(a0)
;; vsetvli a4,zero,e32,mf2,ta,ma ;; --
;; vse32.v v24,(a1) ;; vse32.v v24,(a1)
;; ret ;; ret
;; 2. void foo (int8_t *in, int8_t *out, int M)
;; {
;; for (int i = 0; i < M; i++){
;; vint8mf2_t v = *(vint8mf2_t*)(in + i);
;; *(vint8mf2_t*)(out + i) = v;
;; }
;; }
;;
;; Hoist "vsetvl" instruction in LICM:
;; "@vsetvl": ;; "@vsetvl_no_side_effects":
;; - ;; vsetvli a4,zero,e32,mf2,ta,ma
;; LOOP: ;; LOOP:
;; vsetvli a4,zero,e32,mf2,ta,ma ;; -
;; vle32.v v24,(a0) ;; vle32.v v24,(a0)
;; vsetvli a4,zero,e32,mf2,ta,ma ;; -
;; vse32.v v24,(a1) ;; vse32.v v24,(a1)
;; However, it may produce wrong codegen if we exclude VL/VTYPE in "vsevl".
;; 3. void foo (int8_t *in, int8_t *out, int32_t *in2, int32_t *out2, int M)
;; {
;; for (int i = 0; i < M; i++){
;; vint8mf2_t v = *(vint8mf2_t*)(in + i);
;; vint32mf2_t v2 = *(vint32mf2_t*)(in + i + i);
;; *(vint8mf2_t*)(out + i) = v;
;; *(vint32mf2_t*)(out + i + i) = v2;
;; }
;; }
;;
;; vsetvli a6,zero,e8,mf2,ta,ma
;; vsetvli a2,zero,e32,mf2,ta,ma
;; LOOP:
;; vle8.v v25,(a0)
;; vle32.v v24,(a5)
;; addi a0,a0,1
;; vse8.v v25,(a1)
;; vse32.v v24,(a3)
;;
;; Both vle8.v and vle32.v are using the wrong VL/VTYPE status.
;; We leave it to "insert-vsetvl" PASS to correct this situation.
;; The "insert-vsetvl" PASS mechanism:
;; 1. Before "insert-vsetvl" PASS, only RVV instructions are generated
;; by GCC standard pattern expansion has the corresponding "vsetvl".
;; We exploit each GCC internal optimization pass to optimize the "vsetvl".
;; 2. Correct the VL/VTYPE status for each GCC standard pattern RVV instructions.
;; Insert vsetvl for each RVV instructions that has no VL/VTYPE status if necessary.
;; For example: RVV intrinsics.
;; 3. Optimize "vsetvl" instructions.
(define_insn "@vsetvl"
[(set (match_operand:P 0 "register_operand" "=r")
(unspec:P [(match_operand:P 1 "csr_operand" "rK")
(match_operand 2 "const_int_operand" "i")
(match_operand 3 "const_int_operand" "i")
(match_operand 4 "const_int_operand" "i")
(match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))
(set (reg:SI VL_REGNUM)
(unspec:SI [(match_dup 1)
(match_dup 2)
(match_dup 3)] UNSPEC_VSETVL))
(set (reg:SI VTYPE_REGNUM)
(unspec:SI [(match_dup 2)
(match_dup 3)
(match_dup 4)
(match_dup 5)] UNSPEC_VSETVL))]
"TARGET_VECTOR"
"vset%i1vli\t%0,%1,e%2,%m3,t%p4,m%p5"
[(set_attr "type" "vsetvl")
(set_attr "mode" "")])
;; We keep it as no side effects before reload_completed.
;; In this case, we can gain benefits from different GCC
;; internal PASS such as cprop, fwprop, combine,...etc.
;; Then recover it for "insert-vsetvl" and "sched2" PASS
;; in order to get correct codegen.
(define_insn_and_split "@vsetvl_no_side_effects"
[(set (match_operand:P 0 "register_operand" "=r")
(unspec:P [(match_operand:P 1 "csr_operand" "rK")
(match_operand 2 "const_int_operand" "i")
(match_operand 3 "const_int_operand" "i")
(match_operand 4 "const_int_operand" "i")
(match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))]
"TARGET_VECTOR"
"#"
"&& reload_completed"
[(parallel
[(set (match_dup 0)
(unspec:P [(match_dup 1) (match_dup 2) (match_dup 3)
(match_dup 4) (match_dup 5)] UNSPEC_VSETVL))
(set (reg:SI VL_REGNUM)
(unspec:SI [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_VSETVL))
(set (reg:SI VTYPE_REGNUM)
(unspec:SI [(match_dup 2) (match_dup 3) (match_dup 4)
(match_dup 5)] UNSPEC_VSETVL))])]
""
[(set_attr "type" "vsetvl")
(set_attr "mode" "")])
;; RVV machine description matching format
;; (define_insn ""
;; [(set (match_operand:MODE 0)
;; (if_then_else:MODE
;; (unspec:
;; [(match_operand: 1 "vector_mask_operand")
;; (match_operand N + 4 "vector_length_operand")
;; (match_operand N + 5 "const_int_operand")
;; (match_operand N + 6 "const_int_operand")
;; (reg:SI VL_REGNUM)
;; (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
;; (instruction operation:MODE
;; (match_operand 3
;; (match_operand 4
;; (match_operand 5
;; ................
;; (match_operand N + 3)
;; (match_operand:MODE 2 "vector_reg_or_const0_operand")))]
;;
;; (unspec:[........] UNSPEC_VPREDICATE) is a predicate wrapper.
;; Include mask predicate && length predicate && vector policy.
;; -------------------------------------------------------------------------------
;; ---- Predicated Mov
;; -------------------------------------------------------------------------------
;; Includes:
;; - 7.4. Vector Unit-Stride Instructions
;; - 11.16 Vector Integer Move Instructions
;; - 13.16 Vector Floating-Point Move Instruction
;; - 15.1 Vector Mask-Register Logical Instructions
;; -------------------------------------------------------------------------------
;; vle.v/vse.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f.
;; For vle.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f, we may need merge and mask operand.
;; For vse.v, we don't need merge operand, so it should always match "vu".
;; constraint alternative 0 ~ 1 match vle.v.
;; constraint alternative 2 match vse.v.
;; constraint alternative 3 match vmv.v.v.
;; constraint alternative 4 match vmv.v.i.
;; For vmv.v.i, we allow 2 following cases:
;; 1. (const_vector:VNx1QI repeat [
;; (const_int:QI N)]), -15 <= N < 16.
;; 2. (const_vector:VNx1SF repeat [
;; (const_double:SF 0.0 [0x0.0p+0])]).
(define_insn "@pred_mov"
[(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr")
(if_then_else:V
(unspec:
[(match_operand: 1 "vector_mask_operand" " vm, Wc1, vmWc1, vmWc1, Wc1")
(match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK")
(match_operand 5 "const_int_operand" " i, i, i, i, i")
(match_operand 6 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0")
(match_operand:V 2 "vector_merge_operand" " 0, vu, 0, vu0, vu0")))]
"TARGET_VECTOR"
"@
vle.v\t%0,%3%p1
vle.v\t%0,%3%p1
vse.v\t%3,%0%p1
vmv.v.v\t%0,%3
vmv.v.i\t%0,v%3"
[(set_attr "type" "vlde,vlde,vste,vimov,vimov")
(set_attr "mode" "")])
;; vlm.v/vsm.v/vmclr.m/vmset.m.
;; constraint alternative 0 match vlm.v.
;; constraint alternative 2 match vsm.v.
;; constraint alternative 3 match vmclr.m.
;; constraint alternative 4 match vmset.m.
(define_insn "@pred_mov"
[(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr")
(if_then_else:VB
(unspec:VB
[(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1")
(match_operand 4 "vector_length_operand" " rK, rK, rK, rK")
(match_operand 5 "const_int_operand" " i, i, i, i")
(match_operand 6 "const_int_operand" " i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operand:VB 3 "vector_move_operand" " m, vr, Wc0, Wc1")
(match_operand:VB 2 "vector_merge_operand" " vu, 0, vu, vu")))]
"TARGET_VECTOR"
"@
vlm.v\t%0,%3
vsm.v\t%3,%0
vmclr.m\t%0
vmset.m\t%0"
[(set_attr "type" "vldm,vstm,vmalu,vmalu")
(set_attr "mode" "")])