diff options
author | Martin Liska <mliska@suse.cz> | 2022-10-13 15:54:17 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2022-10-13 15:54:17 +0200 |
commit | bd21c04269deded2c7476ceca1100a26f28ea526 (patch) | |
tree | 197bf75eedac69362078a4ccc0afe5615c45c327 /gcc/config | |
parent | d9e7934d25da4a78ffef1f738206aa1d897911df (diff) | |
parent | 786e4c024f941671a233f5779d73a5d22f4e9588 (diff) | |
download | gcc-bd21c04269deded2c7476ceca1100a26f28ea526.zip gcc-bd21c04269deded2c7476ceca1100a26f28ea526.tar.gz gcc-bd21c04269deded2c7476ceca1100a26f28ea526.tar.bz2 |
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/arc/arc.cc | 54 | ||||
-rw-r--r-- | gcc/config/arc/arc.md | 372 | ||||
-rw-r--r-- | gcc/config/arc/arc.opt | 10 | ||||
-rw-r--r-- | gcc/config/arc/constraints.md | 44 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-modes.def | 82 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-protos.h | 24 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-valu.md | 399 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.cc | 1063 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.h | 24 | ||||
-rw-r--r-- | gcc/config/i386/driver-i386.cc | 13 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 75 | ||||
-rw-r--r-- | gcc/config/mips/driver-native.cc | 25 | ||||
-rw-r--r-- | gcc/config/pru/pru-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/pru/pru.cc | 21 | ||||
-rw-r--r-- | gcc/config/pru/pru.md | 376 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-c.cc | 2 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-vector-builtins.cc | 127 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-vector-builtins.def | 2 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-vector-builtins.h | 45 | ||||
-rw-r--r-- | gcc/config/riscv/riscv.md | 3 | ||||
-rw-r--r-- | gcc/config/vxworks.h | 48 |
22 files changed, 2013 insertions, 804 deletions
diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index db4b56b..e6f52d8 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -2474,6 +2474,20 @@ arc_setup_incoming_varargs (cumulative_args_t args_so_far, } } +/* Return TRUE if reg is ok for short instrcutions. */ + +static bool +arc_check_short_reg_p (rtx op) +{ + if (!REG_P (op)) + return false; + + if (IN_RANGE (REGNO (op) ^ 4, 4, 11)) + return true; + + return false; +} + /* Cost functions. */ /* Provide the costs of an addressing mode that contains ADDR. @@ -2485,7 +2499,7 @@ arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed) switch (GET_CODE (addr)) { case REG : - return speed || satisfies_constraint_Rcq (addr) ? 0 : 1; + return speed || arc_check_short_reg_p (addr) ? 0 : 1; case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: case PRE_MODIFY: case POST_MODIFY: return !speed; @@ -2517,14 +2531,14 @@ arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed) ? COSTS_N_INSNS (1) : speed ? 0 - : (satisfies_constraint_Rcq (plus0) + : (arc_check_short_reg_p (plus0) && satisfies_constraint_O (plus1)) ? 0 : 1); case REG: return (speed < 1 ? 0 - : (satisfies_constraint_Rcq (plus0) - && satisfies_constraint_Rcq (plus1)) + : (arc_check_short_reg_p (plus0) + && arc_check_short_reg_p (plus1)) ? 0 : 1); case CONST : case SYMBOL_REF : @@ -3356,7 +3370,7 @@ arc_save_callee_enter (uint64_t gmask, reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx, - off)); + -off)); XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg); RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1; off -= UNITS_PER_WORD; @@ -3370,7 +3384,7 @@ arc_save_callee_enter (uint64_t gmask, reg = gen_rtx_REG (SImode, regno); mem = gen_frame_mem (SImode, plus_constant (Pmode, stack_pointer_rtx, - off)); + -off)); XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg); RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1; gmask = gmask & ~(1ULL << regno); @@ -3380,7 +3394,7 @@ arc_save_callee_enter (uint64_t gmask, { mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx, - off)); + -off)); XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, hard_frame_pointer_rtx); RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1; off -= UNITS_PER_WORD; @@ -9003,8 +9017,8 @@ arc_output_addsi (rtx *operands, bool cond_p, bool output_p) int intval = (REG_P (operands[2]) ? 1 : CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057); int neg_intval = -intval; - int short_0 = satisfies_constraint_Rcq (operands[0]); - int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1])); + int short_0 = arc_check_short_reg_p (operands[0]); + int short_p = (!cond_p && short_0 && arc_check_short_reg_p (operands[1])); int ret = 0; #define REG_H_P(OP) (REG_P (OP) && ((TARGET_V2 && REGNO (OP) <= 31 \ @@ -9037,7 +9051,7 @@ arc_output_addsi (rtx *operands, bool cond_p, bool output_p) patterns. */ if (short_p && ((REG_H_P (operands[2]) - && (match || satisfies_constraint_Rcq (operands[2]))) + && (match || arc_check_short_reg_p (operands[2]))) || (CONST_INT_P (operands[2]) && ((unsigned) intval <= (match ? 127 : 7))))) ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;1"); @@ -9064,7 +9078,7 @@ arc_output_addsi (rtx *operands, bool cond_p, bool output_p) /* Generate add_s r0,b,u6; add_s r1,b,u6 patterns. */ if (TARGET_CODE_DENSITY && REG_P (operands[0]) && REG_P (operands[1]) && ((REGNO (operands[0]) == 0) || (REGNO (operands[0]) == 1)) - && satisfies_constraint_Rcq (operands[1]) + && arc_check_short_reg_p (operands[1]) && satisfies_constraint_L (operands[2])) ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;6"); } @@ -10033,7 +10047,7 @@ split_addsi (rtx *operands) /* Try for two short insns first. Lengths being equal, we prefer expansions with shorter register lifetimes. */ if (val > 127 && val <= 255 - && satisfies_constraint_Rcq (operands[0])) + && arc_check_short_reg_p (operands[0])) { operands[3] = operands[2]; operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]); @@ -10057,8 +10071,8 @@ split_subsi (rtx *operands) /* Try for two short insns first. Lengths being equal, we prefer expansions with shorter register lifetimes. */ - if (satisfies_constraint_Rcq (operands[0]) - && satisfies_constraint_Rcq (operands[2])) + if (arc_check_short_reg_p (operands[0]) + && arc_check_short_reg_p (operands[2])) { if (val >= -31 && val <= 127) { @@ -10436,12 +10450,12 @@ arc_lra_p (void) return arc_lra_flag; } -/* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to use - Rcq registers, because some insn are shorter with them. OTOH we already - have separate alternatives for this purpose, and other insns don't - mind, so maybe we should rather prefer the other registers? - We need more data, and we can only get that if we allow people to - try all options. */ +/* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to + use q registers, because some insn are shorter with them. OTOH we + already have separate alternatives for this purpose, and other + insns don't mind, so maybe we should rather prefer the other + registers? We need more data, and we can only get that if we allow + people to try all options. */ static int arc_register_priority (int r) { diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 7170445..458d3ed 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -696,15 +696,13 @@ archs4x, archs4xd" ; In order to allow the ccfsm machinery to do its work, the leading compact ; alternatives say 'canuse' - there is another alternative that will match ; when the condition codes are used. -; Rcq won't match if the condition is actually used; to avoid a spurious match -; via q, q is inactivated as constraint there. ; Likewise, the length of an alternative that might be shifted to conditional ; execution must reflect this, lest out-of-range branches are created. ; The iscompact attribute allows the epilogue expander to know for which ; insns it should lengthen the return insn. (define_insn "*movqi_insn" - [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w, w,???w,h, w,Rcq, S,!*x, r,r, Ucm,m,???m, m,Usc") - (match_operand:QI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL, I,?Rac,i,?i, T,Rcq,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))] + [(set (match_operand:QI 0 "move_dest_operand" "=q, q,r,q, h, w, w,???w,h, w,q,S,!*x, r,r, Ucm,m,???m, m,Usc") + (match_operand:QI 1 "move_src_operand" "rL,rP,q,P,hCm1,cL, I,?Rac,i,?i,T,q,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))] "register_operand (operands[0], QImode) || register_operand (operands[1], QImode) || (satisfies_constraint_Cm3 (operands[1]) @@ -742,8 +740,8 @@ archs4x, archs4xd" "if (prepare_move_operands (operands, HImode)) DONE;") (define_insn "*movhi_insn" - [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w, w,???w,Rcq#q,h, w,Rcq, S, r,r, Ucm,m,???m, m,VUsc") - (match_operand:HI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL, I,?Rac, i,i,?i, T,Rcq,Ucm,m,?Rac,c,?Rac,Cm3,i"))] + [(set (match_operand:HI 0 "move_dest_operand" "=q, q,r,q, h, w, w,???w,q,h, w,q,S, r,r, Ucm,m,???m, m,VUsc") + (match_operand:HI 1 "move_src_operand" " rL,rP,q,P,hCm1,cL, I,?Rac,i,i,?i,T,q,Ucm,m,?Rac,c,?Rac,Cm3,i"))] "register_operand (operands[0], HImode) || register_operand (operands[1], HImode) || (CONSTANT_P (operands[1]) @@ -793,8 +791,8 @@ archs4x, archs4xd" ; the iscompact attribute allows the epilogue expander to know for which ; insns it should lengthen the return insn. (define_insn_and_split "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 - [(set (match_operand:SI 0 "move_dest_operand" "=q, q,r,q, h, rl,r, r, r, r, ?r, r, q, h, rl, q, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m, m,VUsc") - (match_operand:SI 1 "move_src_operand" "rL,rP,q,P,hCm1,rLl,I,Clo,Chi,Cbi,Cpc,Clb,Cax,Cal,Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, r,!*Rzd,r,Cm3, C32"))] + [(set (match_operand:SI 0 "move_dest_operand" "=q, q,r,q, h, rl,r, r, r, r, ?r, r, q, h, rl, q, S, Us<,qRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m, m,VUsc") + (match_operand:SI 1 "move_src_operand" "rL,rP,q,P,hCm1,rLl,I,Clo,Chi,Cbi,Cpc,Clb,Cax,Cal,Cal,Uts,q,qRck, Us>,Usd,Ucm, Usd, Ucd,m, r,!*Rzd,r,Cm3, C32"))] "register_operand (operands[0], SImode) || register_operand (operands[1], SImode) || (CONSTANT_P (operands[1]) @@ -998,8 +996,8 @@ archs4x, archs4xd" (match_operand 0 "cc_register" "") (match_operator 4 "zn_compare_operator" [(and:SI - (match_operand:SI 1 "register_operand" "%Rcq,Rcq, c, c, c, c,Rrq,Rrq, c") - (match_operand:SI 2 "nonmemory_operand" "Rcq,C0p,cI,C1p,Ccp,Chs,Cbf,Cbf,???Cal")) + (match_operand:SI 1 "register_operand" "%q, q, c, c, c, c, q, q, c") + (match_operand:SI 2 "nonmemory_operand" "q,C0p,cI,C1p,Ccp,Chs,Cbf,Cbf,???Cal")) (const_int 0)])) (clobber (match_scratch:SI 3 "=X,X,X,X,X,X,Rrq,1,c"))] "TARGET_NPS_BITOPS" @@ -1014,9 +1012,9 @@ archs4x, archs4xd" (match_operator 3 "zn_compare_operator" [(and:SI (match_operand:SI 1 "register_operand" - "%Rcq,Rcq, c, c, c, c, c, c") + "%q, q, c, c, c, c, c, c") (match_operand:SI 2 "nonmemory_operand" - " Rcq,C0p,cI,cL,C1p,Ccp,Chs,Cal")) + " q,C0p,cI,cL,C1p,Ccp,Chs,Cal")) (const_int 0)]))] "reload_completed || !satisfies_constraint_Cbf (operands[2]) @@ -1092,9 +1090,9 @@ archs4x, archs4xd" [(set (match_operand:CC_ZN 0 "cc_set_register" "") (match_operator 5 "zn_compare_operator" [(zero_extract:SI - (match_operand:SI 1 "register_operand" "%Rcqq,c, c,Rrq,c") - (match_operand:SI 2 "const_int_operand" "N,N, n,Cbn,n") - (match_operand:SI 3 "const_int_operand" "n,n,C_0,Cbn,n")) + (match_operand:SI 1 "register_operand" "%q,c, c,Rrq,c") + (match_operand:SI 2 "const_int_operand" "N,N, n,Cbn,n") + (match_operand:SI 3 "const_int_operand" "n,n,C_0,Cbn,n")) (const_int 0)])) (clobber (match_scratch:SI 4 "=X,X,X,Rrq,X"))] "" @@ -1678,7 +1676,7 @@ archs4x, archs4xd" "" { if (rtx_equal_p (operands[1], const0_rtx) && GET_CODE (operands[3]) == NE - && satisfies_constraint_Rcq (operands[0])) + && IN_RANGE (REGNO (operands[0]) ^ 4, 4, 11)) return "sub%?.ne %0,%0,%0"; /* ??? might be good for speed on ARC600 too, *if* properly scheduled. */ if ((optimize_size && (!TARGET_ARC600_FAMILY)) @@ -1980,8 +1978,8 @@ archs4x, archs4xd" ;; Absolute instructions (define_insn "abssi2" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,w,w") - (abs:SI (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,w,w") + (abs:SI (match_operand:SI 1 "nonmemory_operand" "q,cL,Cal")))] "" "abs%? %0,%1%&" [(set_attr "type" "two_cycle_core") @@ -1991,22 +1989,22 @@ archs4x, archs4xd" ;; Maximum and minimum insns (define_insn "smaxsi3" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w, w") - (smax:SI (match_operand:SI 1 "register_operand" "%0, c, c") - (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=r, r, r") + (smax:SI (match_operand:SI 1 "register_operand" "%0, r, r") + (match_operand:SI 2 "nonmemory_operand" "rL,rL,Cal")))] "" - "max%? %0,%1,%2" + "max%?\\t%0,%1,%2" [(set_attr "type" "two_cycle_core") (set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no")] ) (define_insn "sminsi3" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w, w") - (smin:SI (match_operand:SI 1 "register_operand" "%0, c, c") - (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=r, r, r") + (smin:SI (match_operand:SI 1 "register_operand" "%0, r, r") + (match_operand:SI 2 "nonmemory_operand" "rL,rL,Cal")))] "" - "min%? %0,%1,%2" + "min%?\\t%0,%1,%2" [(set_attr "type" "two_cycle_core") (set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no")] @@ -2028,10 +2026,10 @@ archs4x, archs4xd" ; We avoid letting this pattern use LP_COUNT as a register by specifying ; register class 'W' instead of 'w'. (define_insn_and_split "*addsi3_mixed" - ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12 - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,Rcq, h,!*Rsd,Rcq,Rcb,Rcq, Rcqq,Rcqq,Rcw,Rcw, Rcw, W, W,W, W,Rcqq,Rcw, W") - (plus:SI (match_operand:SI 1 "register_operand" "%0, c, 0, Rcqq, 0, 0,Rcb, Rcqq, 0, 0, c, 0, c, c,0, 0, 0, 0, c") - (match_operand:SI 2 "nonmemory_operand" "cL, 0, Cm1, L,CL2,Csp,CM4,RcqqK, cO, cL, 0,cCca,cLCmL,Cca,I,C2a, Cal,Cal,Cal")))] + ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12 + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, h,!*Rsd, q,Rcb, q, q, q, r,r, r, W, W,W, W, q, r, W") + (plus:SI (match_operand:SI 1 "register_operand" "%0,c, 0, q, 0, 0,Rcb, q, 0, 0,r, 0, c, c,0, 0, 0, 0, c") + (match_operand:SI 2 "nonmemory_operand" "cL,0, Cm1, L,CL2,Csp,CM4,qK,cO,rL,0,rCca,cLCmL,Cca,I,C2a,Cal,Cal,Cal")))] "" { arc_output_addsi (operands, arc_ccfsm_cond_exec_p (), true); @@ -2083,9 +2081,9 @@ archs4x, archs4xd" ]) (define_insn "mulhisi3_reg" - [(set (match_operand:SI 0 "register_operand" "=Rcqq,r,r") - (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" " 0,0,r")) - (sign_extend:SI (match_operand:HI 2 "nonmemory_operand" "Rcqq,r,r"))))] + [(set (match_operand:SI 0 "register_operand" "=q,r,r") + (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "0,0,r")) + (sign_extend:SI (match_operand:HI 2 "nonmemory_operand" "q,r,r"))))] "TARGET_MPYW" "mpyw%? %0,%1,%2" [(set_attr "length" "*,4,4") @@ -2123,9 +2121,9 @@ archs4x, archs4xd" ]) (define_insn "umulhisi3_reg" - [(set (match_operand:SI 0 "register_operand" "=Rcqq, r, r") - (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" " %0, 0, r")) - (zero_extend:SI (match_operand:HI 2 "register_operand" " Rcqq, r, r"))))] + [(set (match_operand:SI 0 "register_operand" "=q, r, r") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%0, 0, r")) + (zero_extend:SI (match_operand:HI 2 "register_operand" "q, r, r"))))] "TARGET_MPYW" "mpyuw%? %0,%1,%2" [(set_attr "length" "*,4,4") @@ -2246,8 +2244,8 @@ archs4x, archs4xd" (define_insn "mulsi_600" [(set (match_operand:SI 2 "mlo_operand" "") - (mult:SI (match_operand:SI 0 "register_operand" "%Rcq#q,c,c,c") - (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,I,Cal"))) + (mult:SI (match_operand:SI 0 "register_operand" "%q,c,c,c") + (match_operand:SI 1 "nonmemory_operand" "q,cL,I,Cal"))) (clobber (match_operand:SI 3 "mhi_operand" ""))] "TARGET_MUL64_SET" "mul64%?\\t0,%0,%1" @@ -2282,8 +2280,8 @@ archs4x, archs4xd" (define_insn "mul64" [(set (reg:DI MUL64_OUT_REG) (mult:DI - (sign_extend:DI (match_operand:SI 0 "register_operand" "%Rcq#q, c,c, c")) - (sign_extend:DI (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,L,C32"))))] + (sign_extend:DI (match_operand:SI 0 "register_operand" "%q, c,c, c")) + (sign_extend:DI (match_operand:SI 1 "nonmemory_operand" "q,cL,L,C32"))))] "TARGET_MUL64_SET" "mul64%? \t0, %0, %1%&" [(set_attr "length" "*,4,4,8") @@ -2336,11 +2334,11 @@ archs4x, archs4xd" ; registers, since it cannot be the destination of a multi-cycle insn ; like MPY or MPYU. (define_insn "mulsi3_700" - [(set (match_operand:SI 0 "mpy_dest_reg_operand" "=Rcr,r,r,Rcr,r") - (mult:SI (match_operand:SI 1 "register_operand" "%0,c,0,0,c") - (match_operand:SI 2 "nonmemory_operand" "cL,cL,I,Cal,Cal")))] + [(set (match_operand:SI 0 "mpy_dest_reg_operand" "=r, r,r, r,r") + (mult:SI (match_operand:SI 1 "register_operand" "%0, r,0, 0,r") + (match_operand:SI 2 "nonmemory_operand" "rL,rL,I,Cal,Cal")))] "TARGET_ARC700_MPY" - "mpyu%? %0,%1,%2" + "mpyu%?\\t%0,%1,%2" [(set_attr "length" "4,4,4,8,8") (set_attr "type" "umulti") (set_attr "predicable" "yes,no,no,yes,no") @@ -2501,15 +2499,15 @@ archs4x, archs4xd" (set_attr "length" "8")]) (define_insn "mulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "=Rcr,r,Rcr,r") + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") (truncate:SI (lshiftrt:DI (mult:DI - (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,c, 0,c")) - (sign_extend:DI (match_operand:SI 2 "extend_operand" "c,c, i,i"))) + (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,r,0,r")) + (sign_extend:DI (match_operand:SI 2 "extend_operand" "r,r,i,i"))) (const_int 32))))] "TARGET_MPY" - "mpy%+%? %0,%1,%2" + "mpy%+%?\\t%0,%1,%2" [(set_attr "length" "4,4,8,8") (set_attr "type" "multi") (set_attr "predicable" "yes,no,yes,no") @@ -2518,15 +2516,15 @@ archs4x, archs4xd" ; Note that mpyhu has the same latency as mpy / mpyh, ; thus we use the type multi. (define_insn "*umulsi3_highpart_i" - [(set (match_operand:SI 0 "register_operand" "=Rcr,r,Rcr,r") + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") (truncate:SI (lshiftrt:DI (mult:DI - (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,c, 0,c")) - (zero_extend:DI (match_operand:SI 2 "extend_operand" "c,c, i,i"))) + (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,r,0,r")) + (zero_extend:DI (match_operand:SI 2 "extend_operand" "r,r,i,i"))) (const_int 32))))] "TARGET_MPY" - "mpy%+u%? %0,%1,%2" + "mpy%+u%?\\t%0,%1,%2" [(set_attr "length" "4,4,8,8") (set_attr "type" "multi") (set_attr "predicable" "yes,no,yes,no") @@ -2536,15 +2534,15 @@ archs4x, archs4xd" ;; need a separate pattern for immediates ;; ??? This is fine for combine, but not for reload. (define_insn "umulsi3_highpart_int" - [(set (match_operand:SI 0 "register_operand" "=Rcr, r, r,Rcr, r") + [(set (match_operand:SI 0 "register_operand" "=r, r, r,r, r") (truncate:SI (lshiftrt:DI (mult:DI - (zero_extend:DI (match_operand:SI 1 "register_operand" " 0, c, 0, 0, c")) - (match_operand:DI 2 "immediate_usidi_operand" "L, L, I, Cal, Cal")) + (zero_extend:DI (match_operand:SI 1 "register_operand" " 0, r, 0, 0, r")) + (match_operand:DI 2 "immediate_usidi_operand" "L, L, I,Cal,Cal")) (const_int 32))))] "TARGET_MPY" - "mpy%+u%? %0,%1,%2" + "mpy%+u%?\\t%0,%1,%2" [(set_attr "length" "4,4,4,8,8") (set_attr "type" "multi") (set_attr "predicable" "yes,no,no,yes,no") @@ -2792,13 +2790,13 @@ archs4x, archs4xd" (define_insn "*add_f_2" [(set (reg:CC_C CC_REG) (compare:CC_C - (plus:SI (match_operand:SI 1 "register_operand" "c,0,c") - (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal")) + (plus:SI (match_operand:SI 1 "register_operand" "r ,0,r") + (match_operand:SI 2 "nonmemory_operand" "rL,I,rCal")) (match_dup 2))) - (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w") + (set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") (plus:SI (match_dup 1) (match_dup 2)))] "" - "add.f %0,%1,%2" + "add.f\\t%0,%1,%2" [(set_attr "cond" "set") (set_attr "type" "compare") (set_attr "length" "4,4,8")]) @@ -2895,22 +2893,22 @@ archs4x, archs4xd" ; the casesi expander might generate a sub of zero, so we have to recognize it. ; combine should make such an insn go away. (define_insn_and_split "subsi3_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,Rcw,Rcw,w,w,w, w, w, w") - (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,Rcqq, 0, cL,c,L,I,Cal,Cal, c") - (match_operand:SI 2 "nonmemory_operand" "Rcqq,Rcqq, c, 0,c,c,0, 0, c,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q,r, r,r,r,r, r, r, r") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,q,0,rL,r,L,I,Cal,Cal, r") + (match_operand:SI 2 "nonmemory_operand" "q,q,r, 0,r,r,0, 0, r,Cal")))] "register_operand (operands[1], SImode) || register_operand (operands[2], SImode)" "@ - sub%? %0,%1,%2%& - sub%? %0,%1,%2%& - sub%? %0,%1,%2 - rsub%? %0,%2,%1 - sub %0,%1,%2 - rsub %0,%2,%1 - rsub %0,%2,%1 - rsub%? %0,%2,%1 - rsub %0,%2,%1 - sub %0,%1,%2" + sub%?\\t%0,%1,%2%& + sub%?\\t%0,%1,%2%& + sub%?\\t%0,%1,%2 + rsub%?\\t%0,%2,%1 + sub\\t%0,%1,%2 + rsub\\t%0,%2,%1 + rsub\\t%0,%2,%1 + rsub%?\\t%0,%2,%1 + rsub\\t%0,%2,%1 + sub\\t%0,%1,%2" "reload_completed && get_attr_length (insn) == 8 && satisfies_constraint_I (operands[1]) && GET_CODE (PATTERN (insn)) != COND_EXEC" @@ -2990,19 +2988,19 @@ archs4x, archs4xd" (define_insn "sub_f" [(set (reg:CC CC_REG) - (compare:CC (match_operand:SI 1 "nonmemory_operand" " c,L,0,I,c,Cal") - (match_operand:SI 2 "nonmemory_operand" "cL,c,I,0,Cal,c"))) - (set (match_operand:SI 0 "dest_reg_operand" "=w,w,Rcw,Rcw,w,w") + (compare:CC (match_operand:SI 1 "nonmemory_operand" " r,L,0,I,r,Cal") + (match_operand:SI 2 "nonmemory_operand" "rL,r,I,0,Cal,r"))) + (set (match_operand:SI 0 "dest_reg_operand" "=r,r,r,r,r,r") (minus:SI (match_dup 1) (match_dup 2)))] "register_operand (operands[1], SImode) || register_operand (operands[2], SImode)" "@ - sub.f %0,%1,%2 - rsub.f %0,%2,%1 - sub.f %0,%1,%2 - rsub.f %0,%2,%1 - sub.f %0,%1,%2 - sub.f %0,%1,%2" + sub.f\\t%0,%1,%2 + rsub.f\\t%0,%2,%1 + sub.f\\t%0,%1,%2 + rsub.f\\t%0,%2,%1 + sub.f\\t%0,%1,%2 + sub.f\\t%0,%1,%2" [(set_attr "type" "compare") (set_attr "length" "4,4,4,4,8,8")]) @@ -3051,12 +3049,12 @@ archs4x, archs4xd" ;; N.B. sub[123] has the operands of the MINUS in the opposite order from ;; what synth_mult likes. (define_insn "*sub_n" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") - (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,c,?Cal") - (ashift:SI (match_operand:SI 2 "register_operand" "c,c,c") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,r,?Cal") + (ashift:SI (match_operand:SI 2 "register_operand" "r,r,r") (match_operand:SI 3 "_1_2_3_operand" ""))))] "" - "sub%c3%? %0,%1,%2" + "sub%c3%?\\t%0,%1,%2" [(set_attr "type" "shift") (set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") @@ -3064,12 +3062,12 @@ archs4x, archs4xd" (set_attr "iscompact" "false")]) (define_insn "*sub_n" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") - (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,c,?Cal") - (mult:SI (match_operand:SI 2 "register_operand" "c,c,c") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,r,?Cal") + (mult:SI (match_operand:SI 2 "register_operand" "r,r,r") (match_operand:SI 3 "_2_4_8_operand" ""))))] "" - "sub%z3%? %0,%1,%2" + "sub%z3%?\\t%0,%1,%2" [(set_attr "type" "shift") (set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") @@ -3078,12 +3076,12 @@ archs4x, archs4xd" ; ??? check if combine matches this. (define_insn "*bset" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") (ior:SI (ashift:SI (const_int 1) - (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")) - (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + (match_operand:SI 1 "nonmemory_operand" "rL,rL,r")) + (match_operand:SI 2 "nonmemory_operand" "0,r,Cal")))] "" - "bset%? %0,%2,%1" + "bset%?\\t%0,%2,%1" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3091,12 +3089,12 @@ archs4x, archs4xd" ; ??? check if combine matches this. (define_insn "*bxor" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") (xor:SI (ashift:SI (const_int 1) - (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")) - (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + (match_operand:SI 1 "nonmemory_operand" "rL,rL,r")) + (match_operand:SI 2 "nonmemory_operand" "0,r,Cal")))] "" - "bxor%? %0,%2,%1" + "bxor%?\\t%0,%2,%1" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3104,12 +3102,12 @@ archs4x, archs4xd" ; ??? check if combine matches this. (define_insn "*bclr" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") (and:SI (not:SI (ashift:SI (const_int 1) - (match_operand:SI 1 "nonmemory_operand" "cL,cL,c"))) - (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + (match_operand:SI 1 "nonmemory_operand" "rL,rL,r"))) + (match_operand:SI 2 "nonmemory_operand" "0,r,Cal")))] "" - "bclr%? %0,%2,%1" + "bclr%?\\t%0,%2,%1" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3121,15 +3119,15 @@ archs4x, archs4xd" ; see also iorsi3 for use with constant bit number. (define_insn "*bset_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") - (ior:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") + (ior:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal") (ashift:SI (const_int 1) - (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ] + (match_operand:SI 2 "nonmemory_operand" "rL,rL,r"))) ) ] "" "@ - bset%? %0,%1,%2 ;;peep2, constr 1 - bset %0,%1,%2 ;;peep2, constr 2 - bset %0,%1,%2 ;;peep2, constr 3" + bset%?\\t%0,%1,%2 ;;peep2, constr 1 + bset\\t%0,%1,%2 ;;peep2, constr 2 + bset\\t%0,%1,%2 ;;peep2, constr 3" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3137,15 +3135,15 @@ archs4x, archs4xd" ; see also xorsi3 for use with constant bit number. (define_insn "*bxor_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") - (xor:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") + (xor:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal") (ashift:SI (const_int 1) - (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ] + (match_operand:SI 2 "nonmemory_operand" "rL,rL,r"))) ) ] "" "@ - bxor%? %0,%1,%2 - bxor %0,%1,%2 - bxor %0,%1,%2" + bxor%?\\t%0,%1,%2 + bxor\\t%0,%1,%2 + bxor\\t%0,%1,%2" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3153,15 +3151,15 @@ archs4x, archs4xd" ; see also andsi3 for use with constant bit number. (define_insn "*bclr_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") (and:SI (not:SI (ashift:SI (const_int 1) - (match_operand:SI 2 "nonmemory_operand" "cL,rL,r"))) - (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")))] + (match_operand:SI 2 "nonmemory_operand" "rL,rL,r"))) + (match_operand:SI 1 "nonmemory_operand" "0,r,Cal")))] "" "@ - bclr%? %0,%1,%2 - bclr %0,%1,%2 - bclr %0,%1,%2" + bclr%?\\t%0,%1,%2 + bclr\\t%0,%1,%2 + bclr\\t%0,%1,%2" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3169,17 +3167,17 @@ archs4x, archs4xd" ; see also andsi3 for use with constant bit number. (define_insn "*bmsk_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") - (and:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") + (and:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal") (plus:SI (ashift:SI (const_int 1) (plus:SI (match_operand:SI 2 "nonmemory_operand" "rL,rL,r") (const_int 1))) (const_int -1))))] "" "@ - bmsk%? %0,%1,%2 - bmsk %0,%1,%2 - bmsk %0,%1,%2" + bmsk%?\\t%0,%1,%2 + bmsk\\t%0,%1,%2 + bmsk\\t%0,%1,%2" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3282,18 +3280,18 @@ archs4x, archs4xd" ;;bic define_insn that allows limm to be the first operand (define_insn "*bicsi3_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,Rcw,Rcw,w,w,w") - (and:SI (not:SI (match_operand:SI 1 "nonmemory_operand" "Rcqq,Lc,I,Cal,Lc,Cal,c")) - (match_operand:SI 2 "nonmemory_operand" "0,0,0,0,c,c,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,r,r,r,r,r,r") + (and:SI (not:SI (match_operand:SI 1 "nonmemory_operand" "q,Lr,I,Cal,Lr,Cal,r")) + (match_operand:SI 2 "nonmemory_operand" "0,0,0,0,r,r,Cal")))] "" "@ - bic%? %0, %2, %1%& ;;constraint 0 - bic%? %0,%2,%1 ;;constraint 1 - bic %0,%2,%1 ;;constraint 2, FIXME: will it ever get generated ??? - bic%? %0,%2,%1 ;;constraint 3, FIXME: will it ever get generated ??? - bic %0,%2,%1 ;;constraint 4 - bic %0,%2,%1 ;;constraint 5, FIXME: will it ever get generated ??? - bic %0,%2,%1 ;;constraint 6" + bic%?\\t%0, %2, %1%& ;;constraint 0 + bic%?\\t%0,%2,%1 ;;constraint 1 + bic\\t%0,%2,%1 ;;constraint 2, FIXME: will it ever get generated ??? + bic%?\\t%0,%2,%1 ;;constraint 3, FIXME: will it ever get generated ??? + bic\\t%0,%2,%1 ;;constraint 4 + bic\\t%0,%2,%1 ;;constraint 5, FIXME: will it ever get generated ??? + bic\\t%0,%2,%1 ;;constraint 6" [(set_attr "length" "*,4,4,8,4,8,8") (set_attr "iscompact" "maybe, false, false, false, false, false, false") (set_attr "predicable" "no,yes,no,yes,no,no,no") @@ -3334,19 +3332,19 @@ archs4x, archs4xd" (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,nocond,canuse,nocond")]) (define_insn "xorsi3" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcw,Rcw,Rcw,Rcw, w, w,w, w, w") - (xor:SI (match_operand:SI 1 "register_operand" "%0, Rcq, 0, c, 0, 0, c, c,0, 0, c") - (match_operand:SI 2 "nonmemory_operand" " Rcqq, 0, cL, 0,C0p, I,cL,C0p,I,Cal,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, r,r, r,r, r, r,r, r, r") + (xor:SI (match_operand:SI 1 "register_operand" "%0,q, 0,r, 0,0, r, r,0, 0, r") + (match_operand:SI 2 "nonmemory_operand" "q,0,rL,0,C0p,I,rL,C0p,I,Cal,Cal")))] "" "* switch (which_alternative) { case 0: case 2: case 5: case 6: case 8: case 9: case 10: - return \"xor%? %0,%1,%2%&\"; + return \"xor%?\\t%0,%1,%2%&\"; case 1: case 3: - return \"xor%? %0,%2,%1%&\"; + return \"xor%?\\t%0,%2,%1%&\"; case 4: case 7: - return \"bxor%? %0,%1,%z2\"; + return \"bxor%?\\t%0,%1,%z2\"; default: gcc_unreachable (); } @@ -3358,17 +3356,17 @@ archs4x, archs4xd" (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")]) (define_insn "negsi2" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,Rcw,w") - (neg:SI (match_operand:SI 1 "register_operand" "0,Rcqq,0,c")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q,r,r") + (neg:SI (match_operand:SI 1 "register_operand" "0,q,0,r")))] "" - "neg%? %0,%1%&" + "neg%?\\t%0,%1%&" [(set_attr "type" "unary") (set_attr "iscompact" "maybe,true,false,false") (set_attr "predicable" "no,no,yes,no")]) (define_insn "one_cmplsi2" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") - (not:SI (match_operand:SI 1 "register_operand" "Rcqq,c")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,w") + (not:SI (match_operand:SI 1 "register_operand" "q,c")))] "" "not%? %0,%1%&" [(set_attr "type" "unary,unary") @@ -3498,14 +3496,14 @@ archs4x, archs4xd" (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")]) (define_insn "*lshrsi3_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq,Rcw, w, w") - (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, c,cCal") - (match_operand:SI 2 "nonmemory_operand" "N, N,RcqqM, cL,cL,cCal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, q, r, r, r") + (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,q, 0, 0, r,rCal") + (match_operand:SI 2 "nonmemory_operand" "N,N,qM,rL,rL,rCal")))] "TARGET_BARREL_SHIFTER && (register_operand (operands[1], SImode) || register_operand (operands[2], SImode))" "*return (which_alternative <= 1 && !arc_ccfsm_cond_exec_p () - ? \"lsr%? %0,%1%&\" : \"lsr%? %0,%1,%2%&\");" + ? \"lsr%?\\t%0,%1%&\" : \"lsr%?\\t%0,%1,%2%&\");" [(set_attr "type" "shift") (set_attr "iscompact" "maybe,maybe,maybe,false,false,false") (set_attr "predicable" "no,no,no,yes,no,no") @@ -3546,8 +3544,8 @@ archs4x, archs4xd" ;; modifed cc user if second, but not first operand is a compact register. (define_insn "cmpsi_cc_insn_mixed" [(set (reg:CC CC_REG) - (compare:CC (match_operand:SI 0 "register_operand" "Rcq#q,Rcqq, h, c, c,qRcq,c") - (match_operand:SI 1 "nonmemory_operand" "cO, hO,Cm1,cI,cL, Cal,Cal")))] + (compare:CC (match_operand:SI 0 "register_operand" "q, q, h, c, c, q,c") + (match_operand:SI 1 "nonmemory_operand" "cO,hO,Cm1,cI,cL,Cal,Cal")))] "" "cmp%? %0,%B1%&" [(set_attr "type" "compare") @@ -3559,7 +3557,7 @@ archs4x, archs4xd" (define_insn "*cmpsi_cc_zn_insn" [(set (reg:CC_ZN CC_REG) - (compare:CC_ZN (match_operand:SI 0 "register_operand" "qRcq,c") + (compare:CC_ZN (match_operand:SI 0 "register_operand" "q,c") (const_int 0)))] "" "tst%? %0,%0%&" @@ -3573,7 +3571,7 @@ archs4x, archs4xd" (define_insn "*btst" [(set (reg:CC_ZN CC_REG) (compare:CC_ZN - (zero_extract:SI (match_operand:SI 0 "register_operand" "Rcqq,c") + (zero_extract:SI (match_operand:SI 0 "register_operand" "q,c") (const_int 1) (match_operand:SI 1 "nonmemory_operand" "L,Lc")) (const_int 0)))] @@ -3618,7 +3616,7 @@ archs4x, archs4xd" (define_insn "*cmpsi_cc_z_insn" [(set (reg:CC_Z CC_REG) - (compare:CC_Z (match_operand:SI 0 "register_operand" "qRcq,c") + (compare:CC_Z (match_operand:SI 0 "register_operand" "q,c") (match_operand:SI 1 "p2_immediate_operand" "O,n")))] "" "@ @@ -3631,8 +3629,8 @@ archs4x, archs4xd" (define_insn "*cmpsi_cc_c_insn" [(set (reg:CC_C CC_REG) - (compare:CC_C (match_operand:SI 0 "register_operand" "Rcqq,Rcqq, h, c,Rcqq, c") - (match_operand:SI 1 "nonmemory_operand" "cO, hO,Cm1,cI, Cal,Cal")))] + (compare:CC_C (match_operand:SI 0 "register_operand" "q, q, h, c, q, c") + (match_operand:SI 1 "nonmemory_operand" "cO,hO,Cm1,cI,Cal,Cal")))] "" "cmp%? %0,%1%&" [(set_attr "type" "compare") @@ -3944,7 +3942,7 @@ archs4x, archs4xd" (const_int 2)))]) (define_insn "indirect_jump" - [(set (pc) (match_operand:SI 0 "nonmemory_operand" "L,I,Cal,Rcqq,r"))] + [(set (pc) (match_operand:SI 0 "nonmemory_operand" "L,I,Cal,q,r"))] "" "@ j%!%* %0%& @@ -4076,7 +4074,7 @@ archs4x, archs4xd" ; Unlike the canonical tablejump, this pattern always uses a jump address, ; even for CASE_VECTOR_PC_RELATIVE. (define_insn "casesi_jump" - [(set (pc) (match_operand:SI 0 "register_operand" "Cal,Rcqq,c")) + [(set (pc) (match_operand:SI 0 "register_operand" "Cal,q,c")) (use (label_ref (match_operand 1 "" "")))] "" "j%!%* [%0]%&" @@ -4106,18 +4104,16 @@ archs4x, archs4xd" } ") -; Rcq, which is used in alternative 0, checks for conditional execution. ; At instruction output time, if it doesn't match and we end up with ; alternative 1 ("q"), that means that we can't use the short form. (define_insn "*call_i" [(call (mem:SI (match_operand:SI 0 - "call_address_operand" "Rcq,q,c,Cji,Csc,Cbp,Cbr,L,I,Cal")) + "call_address_operand" "q,c,Cji,Csc,Cbp,Cbr,L,I,Cal")) (match_operand 1 "" "")) (clobber (reg:SI 31))] "" "@ jl%!%* [%0]%& - jl%!%* [%0]%& jl%!%* [%0] jli_s %S0 sjli %S0 @@ -4126,10 +4122,10 @@ archs4x, archs4xd" jl%!%* %0 jl%* %0 jl%! %0" - [(set_attr "type" "call,call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot") - (set_attr "iscompact" "maybe,false,*,true,*,*,*,*,*,*") - (set_attr "predicable" "no,no,yes,no,no,yes,no,yes,no,yes") - (set_attr "length" "*,*,4,2,4,4,4,4,4,8")]) + [(set_attr "type" "call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot") + (set_attr "iscompact" "maybe,*,true,*,*,*,*,*,*") + (set_attr "predicable" "no,yes,no,no,yes,no,yes,no,yes") + (set_attr "length" "*,4,2,4,4,4,4,4,8")]) (define_expand "call_value" ;; operand 2 is stack_size_rtx @@ -4151,19 +4147,17 @@ archs4x, archs4xd" XEXP (operands[1], 0) = force_reg (Pmode, callee); }") -; Rcq, which is used in alternative 0, checks for conditional execution. ; At instruction output time, if it doesn't match and we end up with ; alternative 1 ("q"), that means that we can't use the short form. (define_insn "*call_value_i" - [(set (match_operand 0 "dest_reg_operand" "=Rcq,q,w, w, w, w, w,w,w, w") + [(set (match_operand 0 "dest_reg_operand" "=q,w, w, w, w, w,w,w, w") (call (mem:SI (match_operand:SI 1 - "call_address_operand" "Rcq,q,c,Cji,Csc,Cbp,Cbr,L,I,Cal")) + "call_address_operand" "q,c,Cji,Csc,Cbp,Cbr,L,I,Cal")) (match_operand 2 "" ""))) (clobber (reg:SI 31))] "" "@ jl%!%* [%1]%& - jl%!%* [%1]%& jl%!%* [%1] jli_s %S1 sjli %S1 @@ -4172,10 +4166,10 @@ archs4x, archs4xd" jl%!%* %1 jl%* %1 jl%! %1" - [(set_attr "type" "call,call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot") - (set_attr "iscompact" "maybe,false,*,true,false,*,*,*,*,*") - (set_attr "predicable" "no,no,yes,no,no,yes,no,yes,no,yes") - (set_attr "length" "*,*,4,2,4,4,4,4,4,8")]) + [(set_attr "type" "call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot") + (set_attr "iscompact" "maybe,*,true,false,*,*,*,*,*") + (set_attr "predicable" "no,yes,no,no,yes,no,yes,no,yes") + (set_attr "length" "*,4,2,4,4,4,4,4,8")]) ; There is a bl_s instruction (16 bit opcode branch-and-link), but we can't ; use it for lack of inter-procedural branch shortening. @@ -4943,7 +4937,7 @@ archs4x, archs4xd" [(set (pc) (if_then_else (match_operator 3 "equality_comparison_operator" - [(zero_extract:SI (match_operand:SI 1 "register_operand" "Rcqq,c") + [(zero_extract:SI (match_operand:SI 1 "register_operand" "q,c") (const_int 1) (match_operand:SI 2 "nonmemory_operand" "L,Lc")) (const_int 0)]) @@ -5153,20 +5147,20 @@ archs4x, archs4xd" (set_attr "predicable" "yes")]) (define_insn "abssf2" - [(set (match_operand:SF 0 "dest_reg_operand" "=Rcq#q,Rcw,w") - (abs:SF (match_operand:SF 1 "register_operand" "0, 0,c")))] + [(set (match_operand:SF 0 "dest_reg_operand" "=q,r,r") + (abs:SF (match_operand:SF 1 "register_operand" "0,0,r")))] "" - "bclr%? %0,%1,31%&" + "bclr%?\\t%0,%1,31%&" [(set_attr "type" "unary") (set_attr "iscompact" "maybe,false,false") (set_attr "length" "2,4,4") (set_attr "predicable" "no,yes,no")]) (define_insn "negsf2" - [(set (match_operand:SF 0 "dest_reg_operand" "=Rcw,w") - (neg:SF (match_operand:SF 1 "register_operand" "0,c")))] + [(set (match_operand:SF 0 "dest_reg_operand" "=r,r") + (neg:SF (match_operand:SF 1 "register_operand" "0,r")))] "" - "bxor%? %0,%1,31" + "bxor%?\\t%0,%1,31" [(set_attr "type" "unary") (set_attr "predicable" "yes,no")]) @@ -5966,8 +5960,8 @@ archs4x, archs4xd" (set_attr "length" "4")]) (define_insn "*ashlsi2_cnt1" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") - (ashift:SI (match_operand:SI 1 "register_operand" "Rcqq,c") + [(set (match_operand:SI 0 "dest_reg_operand" "=q,w") + (ashift:SI (match_operand:SI 1 "register_operand" "q,c") (const_int 1)))] "" "asl%? %0,%1%&" @@ -5999,8 +5993,8 @@ archs4x, archs4xd" (set_attr "predicable" "no")]) (define_insn "*lshrsi3_cnt1" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") - (lshiftrt:SI (match_operand:SI 1 "register_operand" "Rcqq,c") + [(set (match_operand:SI 0 "dest_reg_operand" "=q,w") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "q,c") (const_int 1)))] "" "lsr%? %0,%1%&" @@ -6009,8 +6003,8 @@ archs4x, archs4xd" (set_attr "predicable" "no,no")]) (define_insn "*ashrsi3_cnt1" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") - (ashiftrt:SI (match_operand:SI 1 "register_operand" "Rcqq,c") + [(set (match_operand:SI 0 "dest_reg_operand" "=q,w") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c") (const_int 1)))] "" "asr%? %0,%1%&" @@ -6141,7 +6135,7 @@ archs4x, archs4xd" (set_attr "length" "36")]) (define_insn "macd" - [(set (match_operand:DI 0 "even_register_operand" "=Rcr,r,r") + [(set (match_operand:DI 0 "even_register_operand" "=r,r,r") (plus:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,r,r")) @@ -6243,7 +6237,7 @@ archs4x, archs4xd" (set_attr "length" "36")]) (define_insn "macdu" - [(set (match_operand:DI 0 "even_register_operand" "=Rcr,r,r") + [(set (match_operand:DI 0 "even_register_operand" "=r,r,r") (plus:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,r,r")) diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt index 0add5a2..b5827325 100644 --- a/gcc/config/arc/arc.opt +++ b/gcc/config/arc/arc.opt @@ -308,12 +308,14 @@ Target Ignore Does nothing. Preserved for backward compatibility. mRcq -Target Var(TARGET_Rcq) -Enable Rcq constraint handling - most short code generation depends on this. +Target Ignore +Does nothing. Preserved for backward compatibility. + mRcw -Target Var(TARGET_Rcw) -Enable Rcw constraint handling - ccfsm condexec mostly depends on this. +Target Ignore +Does nothing. Preserved for backward compatibility. + mearly-cbranchsi Target Var(TARGET_EARLY_CBRANCHSI) diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md index 02aa37f..38bda12 100644 --- a/gcc/config/arc/constraints.md +++ b/gcc/config/arc/constraints.md @@ -432,50 +432,6 @@ && !arc_legitimate_pic_addr_p (op) && !(satisfies_constraint_I (op) && optimize_size)")) -; Note that the 'cryptic' register constraints will not make reload use the -; associated class to reload into, but this will not penalize reloading of any -; other operands, or using an alternate part of the same alternative. - -; Rcq is different in three important ways from a register class constraint: -; - It does not imply a register class, hence reload will not use it to drive -; reloads. -; - It matches even when there is no register class to describe its accepted -; set; not having such a set again lessens the impact on register allocation. -; - It won't match when the instruction is conditionalized by the ccfsm. -(define_constraint "Rcq" - "@internal - Cryptic q - for short insn generation while not affecting register allocation - Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3}, - @code{r12}-@code{r15}" - (and (match_code "reg") - (match_test "TARGET_Rcq - && !arc_ccfsm_cond_exec_p () - && IN_RANGE (REGNO (op) ^ 4, 4, 11)"))) - -; If we need a reload, we generally want to steer reload to use three-address -; alternatives in preference of two-address alternatives, unless the -; three-address alternative introduces a LIMM that is unnecessary for the -; two-address alternative. -(define_constraint "Rcw" - "@internal - Cryptic w - for use in early alternatives with matching constraint" - (and (match_code "reg") - (match_test - "TARGET_Rcw - && REGNO (op) < FIRST_PSEUDO_REGISTER - && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], - REGNO (op))"))) - -(define_constraint "Rcr" - "@internal - Cryptic r - for use in early alternatives with matching constraint" - (and (match_code "reg") - (match_test - "TARGET_Rcw - && REGNO (op) < FIRST_PSEUDO_REGISTER - && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], - REGNO (op))"))) - (define_constraint "Rcb" "@internal Stack Pointer register @code{r28} - do not reload into its class" diff --git a/gcc/config/gcn/gcn-modes.def b/gcc/config/gcn/gcn-modes.def index 82585de..1b8a320 100644 --- a/gcc/config/gcn/gcn-modes.def +++ b/gcc/config/gcn/gcn-modes.def @@ -29,6 +29,48 @@ VECTOR_MODE (FLOAT, HF, 64); /* V64HF */ VECTOR_MODE (FLOAT, SF, 64); /* V64SF */ VECTOR_MODE (FLOAT, DF, 64); /* V64DF */ +/* Artificial vector modes, for when vector masking doesn't work (yet). */ +VECTOR_MODE (INT, QI, 32); /* V32QI */ +VECTOR_MODE (INT, HI, 32); /* V32HI */ +VECTOR_MODE (INT, SI, 32); /* V32SI */ +VECTOR_MODE (INT, DI, 32); /* V32DI */ +VECTOR_MODE (INT, TI, 32); /* V32TI */ +VECTOR_MODE (FLOAT, HF, 32); /* V32HF */ +VECTOR_MODE (FLOAT, SF, 32); /* V32SF */ +VECTOR_MODE (FLOAT, DF, 32); /* V32DF */ +VECTOR_MODE (INT, QI, 16); /* V16QI */ +VECTOR_MODE (INT, HI, 16); /* V16HI */ +VECTOR_MODE (INT, SI, 16); /* V16SI */ +VECTOR_MODE (INT, DI, 16); /* V16DI */ +VECTOR_MODE (INT, TI, 16); /* V16TI */ +VECTOR_MODE (FLOAT, HF, 16); /* V16HF */ +VECTOR_MODE (FLOAT, SF, 16); /* V16SF */ +VECTOR_MODE (FLOAT, DF, 16); /* V16DF */ +VECTOR_MODE (INT, QI, 8); /* V8QI */ +VECTOR_MODE (INT, HI, 8); /* V8HI */ +VECTOR_MODE (INT, SI, 8); /* V8SI */ +VECTOR_MODE (INT, DI, 8); /* V8DI */ +VECTOR_MODE (INT, TI, 8); /* V8TI */ +VECTOR_MODE (FLOAT, HF, 8); /* V8HF */ +VECTOR_MODE (FLOAT, SF, 8); /* V8SF */ +VECTOR_MODE (FLOAT, DF, 8); /* V8DF */ +VECTOR_MODE (INT, QI, 4); /* V4QI */ +VECTOR_MODE (INT, HI, 4); /* V4HI */ +VECTOR_MODE (INT, SI, 4); /* V4SI */ +VECTOR_MODE (INT, DI, 4); /* V4DI */ +VECTOR_MODE (INT, TI, 4); /* V4TI */ +VECTOR_MODE (FLOAT, HF, 4); /* V4HF */ +VECTOR_MODE (FLOAT, SF, 4); /* V4SF */ +VECTOR_MODE (FLOAT, DF, 4); /* V4DF */ +VECTOR_MODE (INT, QI, 2); /* V2QI */ +VECTOR_MODE (INT, HI, 2); /* V2HI */ +VECTOR_MODE (INT, SI, 2); /* V2SI */ +VECTOR_MODE (INT, DI, 2); /* V2DI */ +VECTOR_MODE (INT, TI, 2); /* V2TI */ +VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ +VECTOR_MODE (FLOAT, SF, 2); /* V2SF */ +VECTOR_MODE (FLOAT, DF, 2); /* V2DF */ + /* Vector units handle reads independently and thus no large alignment needed. */ ADJUST_ALIGNMENT (V64QI, 1); @@ -39,3 +81,43 @@ ADJUST_ALIGNMENT (V64TI, 16); ADJUST_ALIGNMENT (V64HF, 2); ADJUST_ALIGNMENT (V64SF, 4); ADJUST_ALIGNMENT (V64DF, 8); +ADJUST_ALIGNMENT (V32QI, 1); +ADJUST_ALIGNMENT (V32HI, 2); +ADJUST_ALIGNMENT (V32SI, 4); +ADJUST_ALIGNMENT (V32DI, 8); +ADJUST_ALIGNMENT (V32TI, 16); +ADJUST_ALIGNMENT (V32HF, 2); +ADJUST_ALIGNMENT (V32SF, 4); +ADJUST_ALIGNMENT (V32DF, 8); +ADJUST_ALIGNMENT (V16QI, 1); +ADJUST_ALIGNMENT (V16HI, 2); +ADJUST_ALIGNMENT (V16SI, 4); +ADJUST_ALIGNMENT (V16DI, 8); +ADJUST_ALIGNMENT (V16TI, 16); +ADJUST_ALIGNMENT (V16HF, 2); +ADJUST_ALIGNMENT (V16SF, 4); +ADJUST_ALIGNMENT (V16DF, 8); +ADJUST_ALIGNMENT (V8QI, 1); +ADJUST_ALIGNMENT (V8HI, 2); +ADJUST_ALIGNMENT (V8SI, 4); +ADJUST_ALIGNMENT (V8DI, 8); +ADJUST_ALIGNMENT (V8TI, 16); +ADJUST_ALIGNMENT (V8HF, 2); +ADJUST_ALIGNMENT (V8SF, 4); +ADJUST_ALIGNMENT (V8DF, 8); +ADJUST_ALIGNMENT (V4QI, 1); +ADJUST_ALIGNMENT (V4HI, 2); +ADJUST_ALIGNMENT (V4SI, 4); +ADJUST_ALIGNMENT (V4DI, 8); +ADJUST_ALIGNMENT (V4TI, 16); +ADJUST_ALIGNMENT (V4HF, 2); +ADJUST_ALIGNMENT (V4SF, 4); +ADJUST_ALIGNMENT (V4DF, 8); +ADJUST_ALIGNMENT (V2QI, 1); +ADJUST_ALIGNMENT (V2HI, 2); +ADJUST_ALIGNMENT (V2SI, 4); +ADJUST_ALIGNMENT (V2DI, 8); +ADJUST_ALIGNMENT (V2TI, 16); +ADJUST_ALIGNMENT (V2HF, 2); +ADJUST_ALIGNMENT (V2SF, 4); +ADJUST_ALIGNMENT (V2DF, 8); diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index ca80460..f9a1fc0 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -24,6 +24,8 @@ extern bool gcn_constant64_p (rtx); extern bool gcn_constant_p (rtx); extern rtx gcn_convert_mask_mode (rtx reg); extern unsigned int gcn_dwarf_register_number (unsigned int regno); +extern rtx get_exec (int64_t); +extern rtx get_exec (machine_mode mode); extern char * gcn_expand_dpp_shr_insn (machine_mode, const char *, int, int); extern void gcn_expand_epilogue (); extern rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, @@ -34,8 +36,6 @@ extern rtx gcn_expand_scalar_to_vector_address (machine_mode, rtx, rtx, rtx); extern void gcn_expand_vector_init (rtx, rtx); extern bool gcn_flat_address_p (rtx, machine_mode); extern bool gcn_fp_constant_p (rtx, bool); -extern rtx gcn_full_exec (); -extern rtx gcn_full_exec_reg (); extern rtx gcn_gen_undef (machine_mode); extern bool gcn_global_address_p (rtx); extern tree gcn_goacc_adjust_private_decl (location_t, tree var, int level); @@ -67,8 +67,6 @@ extern rtx gcn_operand_part (machine_mode, rtx, int); extern bool gcn_regno_mode_code_ok_for_base_p (int, machine_mode, addr_space_t, int, int); extern reg_class gcn_regno_reg_class (int regno); -extern rtx gcn_scalar_exec (); -extern rtx gcn_scalar_exec_reg (); extern bool gcn_scalar_flat_address_p (rtx); extern bool gcn_scalar_flat_mem_p (rtx); extern bool gcn_sgpr_move_p (rtx, rtx); @@ -105,9 +103,11 @@ extern gimple_opt_pass *make_pass_omp_gcn (gcc::context *ctxt); inline bool vgpr_1reg_mode_p (machine_mode mode) { - return (mode == SImode || mode == SFmode || mode == HImode || mode == QImode - || mode == V64QImode || mode == V64HImode || mode == V64SImode - || mode == V64HFmode || mode == V64SFmode || mode == BImode); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + return (mode == SImode || mode == SFmode || mode == HImode || mode == HFmode + || mode == QImode || mode == BImode); } /* Return true if MODE is valid for 1 SGPR register. */ @@ -124,8 +124,10 @@ sgpr_1reg_mode_p (machine_mode mode) inline bool vgpr_2reg_mode_p (machine_mode mode) { - return (mode == DImode || mode == DFmode - || mode == V64DImode || mode == V64DFmode); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + return (mode == DImode || mode == DFmode); } /* Return true if MODE can be handled directly by VGPR operations. */ @@ -133,9 +135,7 @@ vgpr_2reg_mode_p (machine_mode mode) inline bool vgpr_vector_mode_p (machine_mode mode) { - return (mode == V64QImode || mode == V64HImode - || mode == V64SImode || mode == V64DImode - || mode == V64HFmode || mode == V64SFmode || mode == V64DFmode); + return VECTOR_MODE_P (mode); } diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index dec81e8..00c0e3b 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -17,88 +17,243 @@ ;; {{{ Vector iterators ; Vector modes for specific types -; (This will make more sense when there are multiple vector sizes) (define_mode_iterator V_QI - [V64QI]) + [V2QI V4QI V8QI V16QI V32QI V64QI]) (define_mode_iterator V_HI - [V64HI]) + [V2HI V4HI V8HI V16HI V32HI V64HI]) (define_mode_iterator V_HF - [V64HF]) + [V2HF V4HF V8HF V16HF V32HF V64HF]) (define_mode_iterator V_SI - [V64SI]) + [V2SI V4SI V8SI V16SI V32SI V64SI]) (define_mode_iterator V_SF - [V64SF]) + [V2SF V4SF V8SF V16SF V32SF V64SF]) (define_mode_iterator V_DI - [V64DI]) + [V2DI V4DI V8DI V16DI V32DI V64DI]) (define_mode_iterator V_DF - [V64DF]) + [V2DF V4DF V8DF V16DF V32DF V64DF]) + +(define_mode_iterator V64_SI + [V64SI]) +(define_mode_iterator V64_DI + [V64DI]) ; Vector modes for sub-dword modes (define_mode_iterator V_QIHI - [V64QI V64HI]) + [V2QI V2HI + V4QI V4HI + V8QI V8HI + V16QI V16HI + V32QI V32HI + V64QI V64HI]) ; Vector modes for one vector register (define_mode_iterator V_1REG - [V64QI V64HI V64SI V64HF V64SF]) + [V2QI V2HI V2SI V2HF V2SF + V4QI V4HI V4SI V4HF V4SF + V8QI V8HI V8SI V8HF V8SF + V16QI V16HI V16SI V16HF V16SF + V32QI V32HI V32SI V32HF V32SF + V64QI V64HI V64SI V64HF V64SF]) (define_mode_iterator V_INT_1REG - [V64QI V64HI V64SI]) + [V2QI V2HI V2SI + V4QI V4HI V4SI + V8QI V8HI V8SI + V16QI V16HI V16SI + V32QI V32HI V32SI + V64QI V64HI V64SI]) (define_mode_iterator V_INT_1REG_ALT - [V64QI V64HI V64SI]) + [V2QI V2HI V2SI + V4QI V4HI V4SI + V8QI V8HI V8SI + V16QI V16HI V16SI + V32QI V32HI V32SI + V64QI V64HI V64SI]) (define_mode_iterator V_FP_1REG - [V64HF V64SF]) + [V2HF V2SF + V4HF V4SF + V8HF V8SF + V16HF V16SF + V32HF V32SF + V64HF V64SF]) + +; V64_* modes are for where more general support is unimplemented +; (e.g. reductions) +(define_mode_iterator V64_1REG + [V64QI V64HI V64SI V64HF V64SF]) +(define_mode_iterator V64_INT_1REG + [V64QI V64HI V64SI]) ; Vector modes for two vector registers (define_mode_iterator V_2REG + [V2DI V2DF + V4DI V4DF + V8DI V8DF + V16DI V16DF + V32DI V32DF + V64DI V64DF]) + +(define_mode_iterator V64_2REG [V64DI V64DF]) ; Vector modes with native support (define_mode_iterator V_noQI - [V64HI V64HF V64SI V64SF V64DI V64DF]) + [V2HI V2HF V2SI V2SF V2DI V2DF + V4HI V4HF V4SI V4SF V4DI V4DF + V8HI V8HF V8SI V8SF V8DI V8DF + V16HI V16HF V16SI V16SF V16DI V16DF + V32HI V32HF V32SI V32SF V32DI V32DF + V64HI V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_noHI - [V64HF V64SI V64SF V64DI V64DF]) + [V2HF V2SI V2SF V2DI V2DF + V4HF V4SI V4SF V4DI V4DF + V8HF V8SI V8SF V8DI V8DF + V16HF V16SI V16SF V16DI V16DF + V32HF V32SI V32SF V32DI V32DF + V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_INT_noQI - [V64HI V64SI V64DI]) + [V2HI V2SI V2DI + V4HI V4SI V4DI + V8HI V8SI V8DI + V16HI V16SI V16DI + V32HI V32SI V32DI + V64HI V64SI V64DI]) (define_mode_iterator V_INT_noHI - [V64SI V64DI]) + [V2SI V2DI + V4SI V4DI + V8SI V8DI + V16SI V16DI + V32SI V32DI + V64SI V64DI]) ; All of above (define_mode_iterator V_ALL - [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) + [V2QI V2HI V2HF V2SI V2SF V2DI V2DF + V4QI V4HI V4HF V4SI V4SF V4DI V4DF + V8QI V8HI V8HF V8SI V8SF V8DI V8DF + V16QI V16HI V16HF V16SI V16SF V16DI V16DF + V32QI V32HI V32HF V32SI V32SF V32DI V32DF + V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_ALL_ALT - [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) + [V2QI V2HI V2HF V2SI V2SF V2DI V2DF + V4QI V4HI V4HF V4SI V4SF V4DI V4DF + V8QI V8HI V8HF V8SI V8SF V8DI V8DF + V16QI V16HI V16HF V16SI V16SF V16DI V16DF + V32QI V32HI V32HF V32SI V32SF V32DI V32DF + V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_INT - [V64QI V64HI V64SI V64DI]) + [V2QI V2HI V2SI V2DI + V4QI V4HI V4SI V4DI + V8QI V8HI V8SI V8DI + V16QI V16HI V16SI V16DI + V32QI V32HI V32SI V32DI + V64QI V64HI V64SI V64DI]) (define_mode_iterator V_FP + [V2HF V2SF V2DF + V4HF V4SF V4DF + V8HF V8SF V8DF + V16HF V16SF V16DF + V32HF V32SF V32DF + V64HF V64SF V64DF]) + +(define_mode_iterator V64_ALL + [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) +(define_mode_iterator V64_FP [V64HF V64SF V64DF]) (define_mode_attr scalar_mode - [(V64QI "qi") (V64HI "hi") (V64SI "si") + [(V2QI "qi") (V2HI "hi") (V2SI "si") + (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df") + (V4QI "qi") (V4HI "hi") (V4SI "si") + (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df") + (V8QI "qi") (V8HI "hi") (V8SI "si") + (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df") + (V16QI "qi") (V16HI "hi") (V16SI "si") + (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df") + (V32QI "qi") (V32HI "hi") (V32SI "si") + (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df") + (V64QI "qi") (V64HI "hi") (V64SI "si") (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")]) (define_mode_attr SCALAR_MODE - [(V64QI "QI") (V64HI "HI") (V64SI "SI") + [(V2QI "QI") (V2HI "HI") (V2SI "SI") + (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF") + (V4QI "QI") (V4HI "HI") (V4SI "SI") + (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF") + (V8QI "QI") (V8HI "HI") (V8SI "SI") + (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF") + (V16QI "QI") (V16HI "HI") (V16SI "SI") + (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF") + (V32QI "QI") (V32HI "HI") (V32SI "SI") + (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF") + (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) (define_mode_attr vnsi - [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si") + [(V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si") + (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") + (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si") + (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") + (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si") + (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") + (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si") + (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") + (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si") + (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") + (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si") (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")]) (define_mode_attr VnSI - [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI") + [(V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI") + (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") + (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI") + (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") + (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI") + (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") + (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI") + (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") + (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI") + (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") + (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI") (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")]) (define_mode_attr vndi - [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di") + [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di") + (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") + (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di") + (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") + (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di") + (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") + (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di") + (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") + (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di") + (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") + (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di") (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")]) (define_mode_attr VnDI - [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI") + [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI") + (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") + (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI") + (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") + (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI") + (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") + (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI") + (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") + (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI") + (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") + (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI") (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")]) -(define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")]) +(define_mode_attr sdwa + [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD") + (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD") + (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD") + (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD") + (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD") + (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")]) ;; }}} ;; {{{ Substitutions @@ -180,6 +335,37 @@ (match_operand:V_ALL 1 "general_operand"))] "" { + /* Bitwise reinterpret casts via SUBREG don't work with GCN vector + registers, but we can convert the MEM to a mode that does work. */ + if (MEM_P (operands[0]) && !SUBREG_P (operands[0]) + && SUBREG_P (operands[1]) + && GET_MODE_SIZE (GET_MODE (operands[1])) + == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1])))) + { + rtx src = SUBREG_REG (operands[1]); + rtx mem = copy_rtx (operands[0]); + PUT_MODE_RAW (mem, GET_MODE (src)); + emit_move_insn (mem, src); + DONE; + } + if (MEM_P (operands[1]) && !SUBREG_P (operands[1]) + && SUBREG_P (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) + == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0])))) + { + rtx dest = SUBREG_REG (operands[0]); + rtx mem = copy_rtx (operands[1]); + PUT_MODE_RAW (mem, GET_MODE (dest)); + emit_move_insn (dest, mem); + DONE; + } + + /* SUBREG of MEM is not supported. */ + gcc_assert ((!SUBREG_P (operands[0]) + || !MEM_P (SUBREG_REG (operands[0]))) + && (!SUBREG_P (operands[1]) + || !MEM_P (SUBREG_REG (operands[1])))); + if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed) { operands[1] = force_reg (<MODE>mode, operands[1]); @@ -622,6 +808,40 @@ (set_attr "exec" "none") (set_attr "laneselect" "yes")]) +(define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>" + [(set (match_operand:V_ALL_ALT 0 "register_operand") + (vec_select:V_ALL_ALT + (match_operand:V_ALL 1 "register_operand") + (parallel [(match_operand 2 "immediate_operand")])))] + "MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode) + && <V_ALL_ALT:SCALAR_MODE>mode == <V_ALL:SCALAR_MODE>mode" + { + int numlanes = GET_MODE_NUNITS (<V_ALL_ALT:MODE>mode); + int firstlane = INTVAL (operands[2]) * numlanes; + rtx tmp; + + if (firstlane == 0) + { + /* A plain move will do. */ + tmp = operands[1]; + } else { + /* FIXME: optimize this by using DPP where available. */ + + rtx permutation = gen_reg_rtx (<V_ALL:VnSI>mode); + emit_insn (gen_vec_series<V_ALL:vnsi> (permutation, + GEN_INT (firstlane*4), + GEN_INT (4))); + + tmp = gen_reg_rtx (<V_ALL:MODE>mode); + emit_insn (gen_ds_bpermute<V_ALL:mode> (tmp, permutation, operands[1], + get_exec (<V_ALL:MODE>mode))); + } + + emit_move_insn (operands[0], + gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0)); + DONE; + }) + (define_expand "extract_last_<mode>" [(match_operand:<SCALAR_MODE> 0 "register_operand") (match_operand:DI 1 "gcn_alu_operand") @@ -673,6 +893,16 @@ DONE; }) +(define_expand "vec_init<V_ALL:mode><V_ALL_ALT:mode>" + [(match_operand:V_ALL 0 "register_operand") + (match_operand:V_ALL_ALT 1)] + "<V_ALL:SCALAR_MODE>mode == <V_ALL_ALT:SCALAR_MODE>mode + && MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)" + { + gcn_expand_vector_init (operands[0], operands[1]); + DONE; + }) + ;; }}} ;; {{{ Scatter / Gather @@ -2161,6 +2391,19 @@ (set_attr "length" "8,8")]) ;; }}} +;; {{{ Int unops + +(define_expand "neg<mode>2" + [(match_operand:V_INT 0 "register_operand") + (match_operand:V_INT 1 "register_operand")] + "" + { + emit_insn (gen_sub<mode>3 (operands[0], gcn_vec_constant (<MODE>mode, 0), + operands[1])); + DONE; + }) + +;; }}} ;; {{{ FP binops - special cases ; GCN does not directly provide a DFmode subtract instruction, so we do it by @@ -2419,10 +2662,10 @@ (set_attr "length" "8")]) (define_insn "ldexp<mode>3<exec>" - [(set (match_operand:V_FP 0 "register_operand" "=v") + [(set (match_operand:V_FP 0 "register_operand" "= v") (unspec:V_FP - [(match_operand:V_FP 1 "gcn_alu_operand" "vB") - (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")] + [(match_operand:V_FP 1 "gcn_alu_operand" " vB") + (match_operand:<VnSI> 2 "gcn_alu_operand" "vSvA")] UNSPEC_LDEXP))] "" "v_ldexp%i0\t%0, %1, %2" @@ -2452,8 +2695,8 @@ (set_attr "length" "8")]) (define_insn "frexp<mode>_exp2<exec>" - [(set (match_operand:V64SI 0 "register_operand" "=v") - (unspec:V64SI + [(set (match_operand:<VnSI> 0 "register_operand" "=v") + (unspec:<VnSI> [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] UNSPEC_FREXP_EXP))] "" @@ -2640,9 +2883,27 @@ (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF]) (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF]) -(define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF]) -(define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF]) -(define_mode_iterator VCVT_IMODE [V64HI V64SI]) +(define_mode_iterator VCVT_MODE + [V2HI V2SI V2HF V2SF V2DF + V4HI V4SI V4HF V4SF V4DF + V8HI V8SI V8HF V8SF V8DF + V16HI V16SI V16HF V16SF V16DF + V32HI V32SI V32HF V32SF V32DF + V64HI V64SI V64HF V64SF V64DF]) +(define_mode_iterator VCVT_FMODE + [V2HF V2SF V2DF + V4HF V4SF V4DF + V8HF V8SF V8DF + V16HF V16SF V16DF + V32HF V32SF V32DF + V64HF V64SF V64DF]) +(define_mode_iterator VCVT_IMODE + [V2HI V2SI + V4HI V4SI + V8HI V8SI + V16HI V16SI + V32HI V32SI + V64HI V64SI]) (define_code_iterator cvt_op [fix unsigned_fix float unsigned_float @@ -2669,8 +2930,9 @@ [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v") (cvt_op:VCVT_FMODE (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))] - "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode, - <cvt_name>_cvt)" + "MODE_VF (<VCVT_MODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode) + && gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode, + <cvt_name>_cvt)" "v_cvt<cvt_operands>\t%0, %1" [(set_attr "type" "vop1") (set_attr "length" "8")]) @@ -2679,8 +2941,9 @@ [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v") (cvt_op:VCVT_IMODE (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))] - "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode, - <cvt_name>_cvt)" + "MODE_VF (<VCVT_IMODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode) + && gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode, + <cvt_name>_cvt)" "v_cvt<cvt_operands>\t%0, %1" [(set_attr "type" "vop1") (set_attr "length" "8")]) @@ -3265,7 +3528,7 @@ (define_expand "reduc_<reduc_op>_scal_<mode>" [(set (match_operand:<SCALAR_MODE> 0 "register_operand") (unspec:<SCALAR_MODE> - [(match_operand:V_ALL 1 "register_operand")] + [(match_operand:V64_ALL 1 "register_operand")] REDUC_UNSPEC))] "" { @@ -3284,7 +3547,7 @@ (define_expand "fold_left_plus_<mode>" [(match_operand:<SCALAR_MODE> 0 "register_operand") (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand") - (match_operand:V_FP 2 "gcn_alu_operand")] + (match_operand:V64_FP 2 "gcn_alu_operand")] "can_create_pseudo_p () && (flag_openacc || flag_openmp || flag_associative_math)" @@ -3300,11 +3563,11 @@ }) (define_insn "*<reduc_op>_dpp_shr_<mode>" - [(set (match_operand:V_1REG 0 "register_operand" "=v") - (unspec:V_1REG - [(match_operand:V_1REG 1 "register_operand" "v") - (match_operand:V_1REG 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n")] + [(set (match_operand:V64_1REG 0 "register_operand" "=v") + (unspec:V64_1REG + [(match_operand:V64_1REG 1 "register_operand" "v") + (match_operand:V64_1REG 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] REDUC_UNSPEC))] ; GCN3 requires a carry out, GCN5 not "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode) @@ -3317,11 +3580,11 @@ (set_attr "length" "8")]) (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>" - [(set (match_operand:V_DI 0 "register_operand" "=v") - (unspec:V_DI - [(match_operand:V_DI 1 "register_operand" "v") - (match_operand:V_DI 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n")] + [(set (match_operand:V64_DI 0 "register_operand" "=v") + (unspec:V64_DI + [(match_operand:V64_DI 1 "register_operand" "v") + (match_operand:V64_DI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] REDUC_2REG_UNSPEC))] "" "#" @@ -3346,10 +3609,10 @@ ; Special cases for addition. (define_insn "*plus_carry_dpp_shr_<mode>" - [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") - (unspec:V_INT_1REG - [(match_operand:V_INT_1REG 1 "register_operand" "v") - (match_operand:V_INT_1REG 2 "register_operand" "v") + [(set (match_operand:V64_INT_1REG 0 "register_operand" "=v") + (unspec:V64_INT_1REG + [(match_operand:V64_INT_1REG 1 "register_operand" "v") + (match_operand:V64_INT_1REG 2 "register_operand" "v") (match_operand:SI 3 "const_int_operand" "n")] UNSPEC_PLUS_CARRY_DPP_SHR)) (clobber (reg:DI VCC_REG))] @@ -3363,12 +3626,12 @@ (set_attr "length" "8")]) (define_insn "*plus_carry_in_dpp_shr_<mode>" - [(set (match_operand:V_SI 0 "register_operand" "=v") - (unspec:V_SI - [(match_operand:V_SI 1 "register_operand" "v") - (match_operand:V_SI 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n") - (match_operand:DI 4 "register_operand" "cV")] + [(set (match_operand:V64_SI 0 "register_operand" "=v") + (unspec:V64_SI + [(match_operand:V64_SI 1 "register_operand" "v") + (match_operand:V64_SI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n") + (match_operand:DI 4 "register_operand" "cV")] UNSPEC_PLUS_CARRY_IN_DPP_SHR)) (clobber (reg:DI VCC_REG))] "" @@ -3381,11 +3644,11 @@ (set_attr "length" "8")]) (define_insn_and_split "*plus_carry_dpp_shr_<mode>" - [(set (match_operand:V_DI 0 "register_operand" "=v") - (unspec:V_DI - [(match_operand:V_DI 1 "register_operand" "v") - (match_operand:V_DI 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n")] + [(set (match_operand:V64_DI 0 "register_operand" "=v") + (unspec:V64_DI + [(match_operand:V64_DI 1 "register_operand" "v") + (match_operand:V64_DI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] UNSPEC_PLUS_CARRY_DPP_SHR)) (clobber (reg:DI VCC_REG))] "" @@ -3416,7 +3679,7 @@ (define_insn "mov_from_lane63_<mode>" [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v") (unspec:<SCALAR_MODE> - [(match_operand:V_1REG 1 "register_operand" " v,v")] + [(match_operand:V64_1REG 1 "register_operand" " v,v")] UNSPEC_MOV_FROM_LANE63))] "" "@ @@ -3429,7 +3692,7 @@ (define_insn "mov_from_lane63_<mode>" [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v") (unspec:<SCALAR_MODE> - [(match_operand:V_2REG 1 "register_operand" " v,v")] + [(match_operand:V64_2REG 1 "register_operand" " v,v")] UNSPEC_MOV_FROM_LANE63))] "" "@ diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index c27ee91..3dc294c 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -395,6 +395,97 @@ gcn_scalar_mode_supported_p (scalar_mode mode) || mode == TImode); } +/* Return a vector mode with N lanes of MODE. */ + +static machine_mode +VnMODE (int n, machine_mode mode) +{ + switch (mode) + { + case QImode: + switch (n) + { + case 2: return V2QImode; + case 4: return V4QImode; + case 8: return V8QImode; + case 16: return V16QImode; + case 32: return V32QImode; + case 64: return V64QImode; + } + break; + case HImode: + switch (n) + { + case 2: return V2HImode; + case 4: return V4HImode; + case 8: return V8HImode; + case 16: return V16HImode; + case 32: return V32HImode; + case 64: return V64HImode; + } + break; + case HFmode: + switch (n) + { + case 2: return V2HFmode; + case 4: return V4HFmode; + case 8: return V8HFmode; + case 16: return V16HFmode; + case 32: return V32HFmode; + case 64: return V64HFmode; + } + break; + case SImode: + switch (n) + { + case 2: return V2SImode; + case 4: return V4SImode; + case 8: return V8SImode; + case 16: return V16SImode; + case 32: return V32SImode; + case 64: return V64SImode; + } + break; + case SFmode: + switch (n) + { + case 2: return V2SFmode; + case 4: return V4SFmode; + case 8: return V8SFmode; + case 16: return V16SFmode; + case 32: return V32SFmode; + case 64: return V64SFmode; + } + break; + case DImode: + switch (n) + { + case 2: return V2DImode; + case 4: return V4DImode; + case 8: return V8DImode; + case 16: return V16DImode; + case 32: return V32DImode; + case 64: return V64DImode; + } + break; + case DFmode: + switch (n) + { + case 2: return V2DFmode; + case 4: return V4DFmode; + case 8: return V8DFmode; + case 16: return V16DFmode; + case 32: return V32DFmode; + case 64: return V64DFmode; + } + break; + default: + break; + } + + return VOIDmode; +} + /* Implement TARGET_CLASS_MAX_NREGS. Return the number of hard registers needed to hold a value of MODE in @@ -556,6 +647,23 @@ gcn_can_change_mode_class (machine_mode from, machine_mode to, { if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to)) return true; + + /* Vector conversions are only valid when changing mode with a fixed number + of lanes, or changing number of lanes with a fixed mode. Anything else + would require actual data movement. */ + if (VECTOR_MODE_P (from) && VECTOR_MODE_P (to) + && GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to) + && GET_MODE_INNER (from) != GET_MODE_INNER (to)) + return false; + + /* Vector/scalar conversions are only permitted when the scalar mode + is the same or smaller than the inner vector mode. */ + if ((VECTOR_MODE_P (from) && !VECTOR_MODE_P (to) + && GET_MODE_SIZE (to) >= GET_MODE_SIZE (GET_MODE_INNER (from))) + || (VECTOR_MODE_P (to) && !VECTOR_MODE_P (from) + && GET_MODE_SIZE (from) >= GET_MODE_SIZE (GET_MODE_INNER (to)))) + return false; + return (gcn_class_max_nregs (regclass, from) == gcn_class_max_nregs (regclass, to)); } @@ -595,6 +703,16 @@ gcn_class_likely_spilled_p (reg_class_t rclass) bool gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2) { + if (VECTOR_MODE_P (mode1) || VECTOR_MODE_P (mode2)) + { + int vf1 = (VECTOR_MODE_P (mode1) ? GET_MODE_NUNITS (mode1) : 1); + int vf2 = (VECTOR_MODE_P (mode2) ? GET_MODE_NUNITS (mode2) : 1); + machine_mode inner1 = (vf1 > 1 ? GET_MODE_INNER (mode1) : mode1); + machine_mode inner2 = (vf2 > 1 ? GET_MODE_INNER (mode2) : mode2); + + return (vf1 == vf2 || (inner1 == inner2 && vf2 <= vf1)); + } + return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE); } @@ -616,14 +734,16 @@ gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) rtx gcn_operand_part (machine_mode mode, rtx op, int n) { - if (GET_MODE_SIZE (mode) >= 256) + int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; + + if (vf > 1) { - /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0); */ + machine_mode vsimode = VnMODE (vf, SImode); if (REG_P (op)) { gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER); - return gen_rtx_REG (V64SImode, REGNO (op) + n); + return gen_rtx_REG (vsimode, REGNO (op) + n); } if (GET_CODE (op) == CONST_VECTOR) { @@ -634,10 +754,10 @@ gcn_operand_part (machine_mode mode, rtx op, int n) RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode), CONST_VECTOR_ELT (op, i), n); - return gen_rtx_CONST_VECTOR (V64SImode, v); + return gen_rtx_CONST_VECTOR (vsimode, v); } if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR) - return gcn_gen_undef (V64SImode); + return gcn_gen_undef (vsimode); gcc_unreachable (); } else if (GET_MODE_SIZE (mode) == 8 && REG_P (op)) @@ -726,7 +846,7 @@ gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl, /* Create a new DImode pseudo reg and emit an instruction to initialize it to VAL. */ -static rtx +rtx get_exec (int64_t val) { rtx reg = gen_reg_rtx (DImode); @@ -734,36 +854,11 @@ get_exec (int64_t val) return reg; } -/* Return value of scalar exec register. */ - rtx -gcn_scalar_exec () +get_exec (machine_mode mode) { - return const1_rtx; -} - -/* Return pseudo holding scalar exec register. */ - -rtx -gcn_scalar_exec_reg () -{ - return get_exec (1); -} - -/* Return value of full exec register. */ - -rtx -gcn_full_exec () -{ - return constm1_rtx; -} - -/* Return pseudo holding full exec register. */ - -rtx -gcn_full_exec_reg () -{ - return get_exec (-1); + int vf = (VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1); + return get_exec (0xffffffffffffffffUL >> (64-vf)); } /* }}} */ @@ -802,8 +897,13 @@ int gcn_inline_fp_constant_p (rtx x, bool allow_vector) { machine_mode mode = GET_MODE (x); + int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; + + if (vf > 1) + mode = GET_MODE_INNER (mode); - if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode) + if (vf > 1 + && (mode == HFmode || mode == SFmode || mode == DFmode) && allow_vector) { int n; @@ -812,7 +912,7 @@ gcn_inline_fp_constant_p (rtx x, bool allow_vector) n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false); if (!n) return 0; - for (int i = 1; i < 64; i++) + for (int i = 1; i < vf; i++) if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) return 0; return 1; @@ -867,8 +967,13 @@ bool gcn_fp_constant_p (rtx x, bool allow_vector) { machine_mode mode = GET_MODE (x); + int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; - if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode) + if (vf > 1) + mode = GET_MODE_INNER (mode); + + if (vf > 1 + && (mode == HFmode || mode == SFmode || mode == DFmode) && allow_vector) { int n; @@ -877,7 +982,7 @@ gcn_fp_constant_p (rtx x, bool allow_vector) n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false); if (!n) return false; - for (int i = 1; i < 64; i++) + for (int i = 1; i < vf; i++) if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) return false; return true; @@ -1091,6 +1196,249 @@ gcn_gen_undef (machine_mode mode) } /* }}} */ +/* {{{ Utility functions. */ + +/* Generalised accessor functions for instruction patterns. + The machine desription '@' prefix does something similar, but as of + GCC 10 is incompatible with define_subst, and anyway it doesn't + auto-handle the exec feature. + + Four macros are provided; each function only needs one: + + GEN_VN - create accessor functions for all sizes of one mode + GEN_VNM - create accessor functions for all sizes of all modes + GEN_VN_NOEXEC - for insns without "_exec" variants + GEN_VNM_NOEXEC - likewise + + E.g. add<mode>3 + GEN_VNM (add, 3, A(rtx dest, rtx s1, rtx s2), A(dest, s1, s2) + + gen_addvNsi3 (dst, a, b) + -> calls gen_addv64si3, or gen_addv32si3, etc. + + gen_addvNm3 (dst, a, b) + -> calls gen_addv64qi3, or gen_addv2di3, etc. + + The mode is determined from the first parameter, which must be called + "dest" (or else the macro doesn't work). + + Each function has two optional parameters at the end: merge_src and exec. + If exec is non-null, the function will call the "_exec" variant of the + insn. If exec is non-null but merge_src is null then an undef unspec + will be created. + + E.g. cont. + gen_addvNsi3 (v64sidst, a, b, oldval, exec) + -> calls gen_addv64si3_exec (v64sidst, a, b, oldval, exec) + + gen_addvNm3 (v2qidst, a, b, NULL, exec) + -> calls gen_addv2qi3_exec (v2qidst, a, b, + gcn_gen_undef (V2QImode), exec) + */ + +#define A(...) __VA_ARGS__ +#define GEN_VN_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \ +static rtx \ +gen_##PREFIX##vN##SUFFIX (PARAMS) \ +{ \ + machine_mode mode = GET_MODE (dest); \ + int n = GET_MODE_NUNITS (mode); \ + \ + switch (n) \ + { \ + case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \ + case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \ + case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \ + case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \ + case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \ + case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +#define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \ +GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \ +static rtx \ +gen_##PREFIX##vNm##SUFFIX (PARAMS) \ +{ \ + machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \ + \ + switch (mode) \ + { \ + case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \ + case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \ + case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \ + case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \ + case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \ + case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \ + case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \ + default: \ + break; \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +#define GEN_VN(PREFIX, SUFFIX, PARAMS, ARGS) \ +static rtx \ +gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \ +{ \ + machine_mode mode = GET_MODE (dest); \ + int n = GET_MODE_NUNITS (mode); \ + \ + if (exec && !merge_src) \ + merge_src = gcn_gen_undef (mode); \ + \ + if (exec) \ + switch (n) \ + { \ + case 2: return gen_##PREFIX##v2##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 4: return gen_##PREFIX##v4##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 8: return gen_##PREFIX##v8##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 16: return gen_##PREFIX##v16##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 32: return gen_##PREFIX##v32##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 64: return gen_##PREFIX##v64##SUFFIX##_exec (ARGS, merge_src, exec); \ + } \ + else \ + switch (n) \ + { \ + case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \ + case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \ + case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \ + case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \ + case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \ + case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +#define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \ +GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \ +static rtx \ +gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \ +{ \ + machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \ + \ + switch (mode) \ + { \ + case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \ + case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \ + case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \ + case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \ + case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \ + case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \ + case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \ + default: \ + break; \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc), + A(dest, src1, src2, vcc)) +GEN_VN (add,di3_sext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,di3_vcc_zext_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc), + A(dest, src1, src2, vcc)) +GEN_VN (add,di3_zext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc), + A(dest, src1, src2, vcc)) +GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin), + A(dest, src1, src2, vccout, vccin)) +GEN_VN (and,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift)) +GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec), + A(dest, addr, src, exec)) +GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol), + A(dest, addr, as, vol)) +GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src)) +GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src)) +GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c)) + +#undef GEN_VNM +#undef GEN_VN +#undef GET_VN_FN +#undef A + +/* Get icode for vector instructions without an optab. */ + +#define CODE_FOR(PREFIX, SUFFIX) \ +static int \ +get_code_for_##PREFIX##vN##SUFFIX (int nunits) \ +{ \ + switch (nunits) \ + { \ + case 2: return CODE_FOR_##PREFIX##v2##SUFFIX; \ + case 4: return CODE_FOR_##PREFIX##v4##SUFFIX; \ + case 8: return CODE_FOR_##PREFIX##v8##SUFFIX; \ + case 16: return CODE_FOR_##PREFIX##v16##SUFFIX; \ + case 32: return CODE_FOR_##PREFIX##v32##SUFFIX; \ + case 64: return CODE_FOR_##PREFIX##v64##SUFFIX; \ + } \ + \ + gcc_unreachable (); \ + return CODE_FOR_nothing; \ +} + +#define CODE_FOR_OP(PREFIX) \ + CODE_FOR (PREFIX, qi) \ + CODE_FOR (PREFIX, hi) \ + CODE_FOR (PREFIX, hf) \ + CODE_FOR (PREFIX, si) \ + CODE_FOR (PREFIX, sf) \ + CODE_FOR (PREFIX, di) \ + CODE_FOR (PREFIX, df) \ +static int \ +get_code_for_##PREFIX (machine_mode mode) \ +{ \ + int vf = GET_MODE_NUNITS (mode); \ + machine_mode smode = GET_MODE_INNER (mode); \ + \ + switch (smode) \ + { \ + case E_QImode: return get_code_for_##PREFIX##vNqi (vf); \ + case E_HImode: return get_code_for_##PREFIX##vNhi (vf); \ + case E_HFmode: return get_code_for_##PREFIX##vNhf (vf); \ + case E_SImode: return get_code_for_##PREFIX##vNsi (vf); \ + case E_SFmode: return get_code_for_##PREFIX##vNsf (vf); \ + case E_DImode: return get_code_for_##PREFIX##vNdi (vf); \ + case E_DFmode: return get_code_for_##PREFIX##vNdf (vf); \ + default: break; \ + } \ + \ + gcc_unreachable (); \ + return CODE_FOR_nothing; \ +} + +CODE_FOR_OP (reload_in) +CODE_FOR_OP (reload_out) + +#undef CODE_FOR_OP +#undef CODE_FOR + +/* }}} */ /* {{{ Addresses, pointers and moves. */ /* Return true is REG is a valid place to store a pointer, @@ -1644,103 +1992,152 @@ regno_ok_for_index_p (int regno) return regno == M0_REG || VGPR_REGNO_P (regno); } -/* Generate move which uses the exec flags. If EXEC is NULL, then it is - assumed that all lanes normally relevant to the mode of the move are - affected. If PREV is NULL, then a sensible default is supplied for - the inactive lanes. */ +/* Expand vector init of OP0 by VEC. + Implements vec_init instruction pattern. */ -static rtx -gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL) +void +gcn_expand_vector_init (rtx op0, rtx vec) { + rtx val[64]; machine_mode mode = GET_MODE (op0); + int vf = GET_MODE_NUNITS (mode); + machine_mode addrmode = VnMODE (vf, DImode); + machine_mode offsetmode = VnMODE (vf, SImode); - if (vgpr_vector_mode_p (mode)) - { - if (exec && exec != CONSTM1_RTX (DImode)) - { - if (!prev) - prev = op0; - } - else - { - if (!prev) - prev = gcn_gen_undef (mode); - exec = gcn_full_exec_reg (); - } + int64_t mem_mask = 0; + int64_t item_mask[64]; + rtx ramp = gen_reg_rtx (offsetmode); + rtx addr = gen_reg_rtx (addrmode); - rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec)); + int unit_size = GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op0))); + emit_insn (gen_mulvNsi3_dup (ramp, gen_rtx_REG (offsetmode, VGPR_REGNO (1)), + GEN_INT (unit_size))); - return gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (2, set, - gen_rtx_CLOBBER (VOIDmode, - gen_rtx_SCRATCH (V64DImode)))); - } + bool simple_repeat = true; - return (gen_rtx_PARALLEL - (VOIDmode, - gen_rtvec (2, gen_rtx_SET (op0, op1), - gen_rtx_USE (VOIDmode, - exec ? exec : gcn_scalar_exec ())))); -} + /* Expand nested vectors into one vector. */ + int item_count = XVECLEN (vec, 0); + for (int i = 0, j = 0; i < item_count; i++) + { + rtx item = XVECEXP (vec, 0, i); + machine_mode mode = GET_MODE (item); + int units = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; + item_mask[j] = (((uint64_t)-1)>>(64-units)) << j; -/* Generate masked move. */ + if (simple_repeat && i != 0) + simple_repeat = item == XVECEXP (vec, 0, i-1); -static rtx -gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL) -{ - if (exec) - return (gen_rtx_SET (op0, - gen_rtx_VEC_MERGE (GET_MODE (op0), - gen_rtx_VEC_DUPLICATE (GET_MODE - (op0), op1), - op2, exec))); - else - return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1))); -} + /* If its a vector of values then copy them into the final location. */ + if (GET_CODE (item) == CONST_VECTOR) + { + for (int k = 0; k < units; k++) + val[j++] = XVECEXP (item, 0, k); + continue; + } + /* Otherwise, we have a scalar or an expression that expands... */ -/* Expand vector init of OP0 by VEC. - Implements vec_init instruction pattern. */ + if (MEM_P (item)) + { + rtx base = XEXP (item, 0); + if (MEM_ADDR_SPACE (item) == DEFAULT_ADDR_SPACE + && REG_P (base)) + { + /* We have a simple vector load. We can put the addresses in + the vector, combine it with any other such MEMs, and load it + all with a single gather at the end. */ + int64_t mask = ((0xffffffffffffffffUL + >> (64-GET_MODE_NUNITS (mode))) + << j); + rtx exec = get_exec (mask); + emit_insn (gen_subvNsi3 + (ramp, ramp, + gcn_vec_constant (offsetmode, j*unit_size), + ramp, exec)); + emit_insn (gen_addvNdi3_zext_dup2 + (addr, ramp, base, + (mem_mask ? addr : gcn_gen_undef (addrmode)), + exec)); + mem_mask |= mask; + } + else + /* The MEM is non-trivial, so let's load it independently. */ + item = force_reg (mode, item); + } + else if (!CONST_INT_P (item) && !CONST_DOUBLE_P (item)) + /* The item may be a symbol_ref, or something else non-trivial. */ + item = force_reg (mode, item); + + /* Duplicate the vector across each item. + It is either a smaller vector register that needs shifting, + or a MEM that needs loading. */ + val[j] = item; + j += units; + } -void -gcn_expand_vector_init (rtx op0, rtx vec) -{ int64_t initialized_mask = 0; - int64_t curr_mask = 1; - machine_mode mode = GET_MODE (op0); + rtx prev = NULL; - rtx val = XVECEXP (vec, 0, 0); - - for (int i = 1; i < 64; i++) - if (rtx_equal_p (val, XVECEXP (vec, 0, i))) - curr_mask |= (int64_t) 1 << i; + if (mem_mask) + { + emit_insn (gen_gathervNm_expr + (op0, gen_rtx_PLUS (addrmode, addr, + gen_rtx_VEC_DUPLICATE (addrmode, + const0_rtx)), + GEN_INT (DEFAULT_ADDR_SPACE), GEN_INT (0), + NULL, get_exec (mem_mask))); + prev = op0; + initialized_mask = mem_mask; + } - if (gcn_constant_p (val)) - emit_move_insn (op0, gcn_vec_constant (mode, val)); - else + if (simple_repeat && item_count > 1 && !prev) { - val = force_reg (GET_MODE_INNER (mode), val); - emit_insn (gen_duplicate_load (op0, val)); + /* Special case for instances of {A, B, A, B, A, B, ....}, etc. */ + rtx src = gen_rtx_SUBREG (mode, val[0], 0); + rtx input_vf_mask = GEN_INT (GET_MODE_NUNITS (GET_MODE (val[0]))-1); + + rtx permutation = gen_reg_rtx (VnMODE (vf, SImode)); + emit_insn (gen_vec_seriesvNsi (permutation, GEN_INT (0), GEN_INT (1))); + rtx mask_dup = gen_reg_rtx (VnMODE (vf, SImode)); + emit_insn (gen_vec_duplicatevNsi (mask_dup, input_vf_mask)); + emit_insn (gen_andvNsi3 (permutation, permutation, mask_dup)); + emit_insn (gen_ashlvNsi3 (permutation, permutation, GEN_INT (2))); + emit_insn (gen_ds_bpermutevNm (op0, permutation, src, get_exec (mode))); + return; } - initialized_mask |= curr_mask; - for (int i = 1; i < 64; i++) + + /* Write each value, elementwise, but coalesce matching values into one + instruction, where possible. */ + for (int i = 0; i < vf; i++) if (!(initialized_mask & ((int64_t) 1 << i))) { - curr_mask = (int64_t) 1 << i; - rtx val = XVECEXP (vec, 0, i); - - for (int j = i + 1; j < 64; j++) - if (rtx_equal_p (val, XVECEXP (vec, 0, j))) - curr_mask |= (int64_t) 1 << j; - if (gcn_constant_p (val)) - emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val), - get_exec (curr_mask))); + if (gcn_constant_p (val[i])) + emit_insn (gen_movvNm (op0, gcn_vec_constant (mode, val[i]), prev, + get_exec (item_mask[i]))); + else if (VECTOR_MODE_P (GET_MODE (val[i])) + && (GET_MODE_NUNITS (GET_MODE (val[i])) == vf + || i == 0)) + emit_insn (gen_movvNm (op0, gen_rtx_SUBREG (mode, val[i], 0), prev, + get_exec (item_mask[i]))); + else if (VECTOR_MODE_P (GET_MODE (val[i]))) + { + rtx permutation = gen_reg_rtx (VnMODE (vf, SImode)); + emit_insn (gen_vec_seriesvNsi (permutation, GEN_INT (-i*4), + GEN_INT (4))); + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_ds_bpermutevNm (tmp, permutation, + gen_rtx_SUBREG (mode, val[i], 0), + get_exec (-1))); + emit_insn (gen_movvNm (op0, tmp, prev, get_exec (item_mask[i]))); + } else { - val = force_reg (GET_MODE_INNER (mode), val); - emit_insn (gen_duplicate_load (op0, val, op0, - get_exec (curr_mask))); + rtx reg = force_reg (GET_MODE_INNER (mode), val[i]); + emit_insn (gen_vec_duplicatevNm (op0, reg, prev, + get_exec (item_mask[i]))); } - initialized_mask |= curr_mask; + + initialized_mask |= item_mask[i]; + prev = op0; } } @@ -1751,18 +2148,18 @@ strided_constant (machine_mode mode, int base, int val) { rtx x = gen_reg_rtx (mode); emit_move_insn (x, gcn_vec_constant (mode, base)); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32), - x, get_exec (0xffffffff00000000))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16), - x, get_exec (0xffff0000ffff0000))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8), - x, get_exec (0xff00ff00ff00ff00))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4), - x, get_exec (0xf0f0f0f0f0f0f0f0))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2), - x, get_exec (0xcccccccccccccccc))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1), - x, get_exec (0xaaaaaaaaaaaaaaaa))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 32), + x, get_exec (0xffffffff00000000))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 16), + x, get_exec (0xffff0000ffff0000))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 8), + x, get_exec (0xff00ff00ff00ff00))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 4), + x, get_exec (0xf0f0f0f0f0f0f0f0))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 2), + x, get_exec (0xcccccccccccccccc))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 1), + x, get_exec (0xaaaaaaaaaaaaaaaa))); return x; } @@ -1792,15 +2189,17 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode, case ADDR_SPACE_LDS: case ADDR_SPACE_GDS: /* FIXME: LDS support offsets, handle them!. */ - if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode) + if (vgpr_vector_mode_p (mode) + && GET_MODE_INNER (GET_MODE (x)) != SImode) { - rtx addrs = gen_reg_rtx (V64SImode); + machine_mode simode = VnMODE (GET_MODE_NUNITS (mode), SImode); + rtx addrs = gen_reg_rtx (simode); rtx base = force_reg (SImode, x); - rtx offsets = strided_constant (V64SImode, 0, + rtx offsets = strided_constant (simode, 0, GET_MODE_UNIT_SIZE (mode)); - emit_insn (gen_vec_duplicatev64si (addrs, base)); - emit_insn (gen_addv64si3 (addrs, offsets, addrs)); + emit_insn (gen_vec_duplicatevNsi (addrs, base)); + emit_insn (gen_addvNsi3 (addrs, offsets, addrs)); return addrs; } return x; @@ -1808,16 +2207,18 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode, gcc_unreachable (); } -/* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:V64DI)) with the +/* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:VnDI)) with the proper vector of stepped addresses. MEM will be a DImode address of a vector in an SGPR. - TMP will be a V64DImode VGPR pair or (scratch:V64DI). */ + TMP will be a VnDImode VGPR pair or (scratch:VnDI). */ rtx gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, rtx tmp) { + machine_mode pmode = VnMODE (GET_MODE_NUNITS (mode), DImode); + machine_mode offmode = VnMODE (GET_MODE_NUNITS (mode), SImode); gcc_assert (MEM_P (mem)); rtx mem_base = XEXP (mem, 0); rtx mem_index = NULL_RTX; @@ -1841,22 +2242,18 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, machine_mode inner = GET_MODE_INNER (mode); int shift = exact_log2 (GET_MODE_SIZE (inner)); - rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); - rtx undef_v64si = gcn_gen_undef (V64SImode); + rtx ramp = gen_rtx_REG (offmode, VGPR_REGNO (1)); rtx new_base = NULL_RTX; addr_space_t as = MEM_ADDR_SPACE (mem); rtx tmplo = (REG_P (tmp) - ? gcn_operand_part (V64DImode, tmp, 0) - : gen_reg_rtx (V64SImode)); + ? gcn_operand_part (pmode, tmp, 0) + : gen_reg_rtx (offmode)); /* tmplo[:] = ramp[:] << shift */ - if (exec) - emit_insn (gen_ashlv64si3_exec (tmplo, ramp, - gen_int_mode (shift, SImode), - undef_v64si, exec)); - else - emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode))); + emit_insn (gen_ashlvNsi3 (tmplo, ramp, + gen_int_mode (shift, SImode), + NULL, exec)); if (AS_FLAT_P (as)) { @@ -1866,53 +2263,41 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, { rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0); rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1); - rtx tmphi = gcn_operand_part (V64DImode, tmp, 1); + rtx tmphi = gcn_operand_part (pmode, tmp, 1); /* tmphi[:] = mem_base_hi */ - if (exec) - emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi, - undef_v64si, exec)); - else - emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi)); + emit_insn (gen_vec_duplicatevNsi (tmphi, mem_base_hi, NULL, exec)); /* tmp[:] += zext (mem_base) */ if (exec) { - emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo, - vcc, undef_v64si, exec)); - emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx, - vcc, vcc, undef_v64si, exec)); + emit_insn (gen_addvNsi3_vcc_dup (tmplo, mem_base_lo, tmplo, + vcc, NULL, exec)); + emit_insn (gen_addcvNsi3 (tmphi, tmphi, const0_rtx, + vcc, vcc, NULL, exec)); } else - emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc)); + emit_insn (gen_addvNdi3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc)); } else { - tmp = gen_reg_rtx (V64DImode); - if (exec) - emit_insn (gen_addv64di3_vcc_zext_dup2_exec - (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode), - exec)); - else - emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc)); + tmp = gen_reg_rtx (pmode); + emit_insn (gen_addvNdi3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc, + NULL, exec)); } new_base = tmp; } else if (AS_ANY_DS_P (as)) { - if (!exec) - emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base)); - else - emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base, - gcn_gen_undef (V64SImode), exec)); + emit_insn (gen_addvNsi3_dup (tmplo, tmplo, mem_base, NULL, exec)); new_base = tmplo; } else { - mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base); - new_base = gen_rtx_PLUS (V64DImode, mem_base, - gen_rtx_SIGN_EXTEND (V64DImode, tmplo)); + mem_base = gen_rtx_VEC_DUPLICATE (pmode, mem_base); + new_base = gen_rtx_PLUS (pmode, mem_base, + gen_rtx_SIGN_EXTEND (pmode, tmplo)); } return gen_rtx_PLUS (GET_MODE (new_base), new_base, @@ -1929,42 +2314,33 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, If EXEC is set then _exec patterns will be used, otherwise plain. Return values. - ADDR_SPACE_FLAT - return V64DImode vector of absolute addresses. - ADDR_SPACE_GLOBAL - return V64SImode vector of offsets. */ + ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses. + ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */ rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale, bool unsigned_p, rtx exec) { - rtx tmpsi = gen_reg_rtx (V64SImode); - rtx tmpdi = gen_reg_rtx (V64DImode); - rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL; - rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL; + int vf = GET_MODE_NUNITS (GET_MODE (offsets)); + rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode)); + rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode)); if (CONST_INT_P (scale) && INTVAL (scale) > 0 && exact_log2 (INTVAL (scale)) >= 0) - emit_insn (gen_ashlv64si3 (tmpsi, offsets, - GEN_INT (exact_log2 (INTVAL (scale))))); + emit_insn (gen_ashlvNsi3 (tmpsi, offsets, + GEN_INT (exact_log2 (INTVAL (scale))), + NULL, exec)); else - (exec - ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi, - exec)) - : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale))); + emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec)); /* "Global" instructions do not support negative register offsets. */ if (as == ADDR_SPACE_FLAT || !unsigned_p) { if (unsigned_p) - (exec - ? emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base, - undefdi, exec)) - : emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base))); + emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec)); else - (exec - ? emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base, - undefdi, exec)) - : emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base))); + emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec)); return tmpdi; } else if (as == ADDR_SPACE_GLOBAL) @@ -2065,59 +2441,9 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass, || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT) { if (in_p) - switch (reload_mode) - { - case E_V64SImode: - sri->icode = CODE_FOR_reload_inv64si; - break; - case E_V64SFmode: - sri->icode = CODE_FOR_reload_inv64sf; - break; - case E_V64HImode: - sri->icode = CODE_FOR_reload_inv64hi; - break; - case E_V64HFmode: - sri->icode = CODE_FOR_reload_inv64hf; - break; - case E_V64QImode: - sri->icode = CODE_FOR_reload_inv64qi; - break; - case E_V64DImode: - sri->icode = CODE_FOR_reload_inv64di; - break; - case E_V64DFmode: - sri->icode = CODE_FOR_reload_inv64df; - break; - default: - gcc_unreachable (); - } + sri->icode = get_code_for_reload_in (reload_mode); else - switch (reload_mode) - { - case E_V64SImode: - sri->icode = CODE_FOR_reload_outv64si; - break; - case E_V64SFmode: - sri->icode = CODE_FOR_reload_outv64sf; - break; - case E_V64HImode: - sri->icode = CODE_FOR_reload_outv64hi; - break; - case E_V64HFmode: - sri->icode = CODE_FOR_reload_outv64hf; - break; - case E_V64QImode: - sri->icode = CODE_FOR_reload_outv64qi; - break; - case E_V64DImode: - sri->icode = CODE_FOR_reload_outv64di; - break; - case E_V64DFmode: - sri->icode = CODE_FOR_reload_outv64df; - break; - default: - gcc_unreachable (); - } + sri->icode = get_code_for_reload_out (reload_mode); break; } /* Fallthrough. */ @@ -3428,6 +3754,9 @@ gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op) if (VECTOR_MODE_P (from)) { + if (GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to)) + return false; + from = GET_MODE_INNER (from); to = GET_MODE_INNER (to); } @@ -3926,7 +4255,7 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , rtx mem = gen_rtx_MEM (GET_MODE (target), addrs); /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */ /* FIXME: set attributes. */ - emit_insn (gen_mov_with_exec (target, mem, exec)); + emit_insn (gen_movvNm (target, mem, NULL, exec)); return target; } case GCN_BUILTIN_FLAT_STORE_PTR_INT32: @@ -3961,20 +4290,18 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , rtx mem = gen_rtx_MEM (vmode, addrs); /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */ /* FIXME: set attributes. */ - emit_insn (gen_mov_with_exec (mem, val, exec)); + emit_insn (gen_movvNm (mem, val, NULL, exec)); return target; } case GCN_BUILTIN_SQRTVF: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_sqrtv64sf2_exec - (target, arg, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_sqrtv64sf2 (target, arg)); return target; } case GCN_BUILTIN_SQRTF: @@ -3992,20 +4319,17 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_absv64sf2_exec - (target, arg, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_absv64sf2 (target, arg)); return target; } case GCN_BUILTIN_LDEXPVF: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg1 = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, @@ -4014,15 +4338,13 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64SImode, EXPAND_NORMAL)); - emit_insn (gen_ldexpv64sf3_exec - (target, arg1, arg2, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_ldexpv64sf3 (target, arg1, arg2)); return target; } case GCN_BUILTIN_LDEXPV: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg1 = force_reg (V64DFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, @@ -4031,60 +4353,51 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64SImode, EXPAND_NORMAL)); - emit_insn (gen_ldexpv64df3_exec - (target, arg1, arg2, gcn_gen_undef (V64DFmode), exec)); + emit_insn (gen_ldexpv64df3 (target, arg1, arg2)); return target; } case GCN_BUILTIN_FREXPVF_EXP: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64sf_exp2_exec - (target, arg, gcn_gen_undef (V64SImode), exec)); + emit_insn (gen_frexpv64sf_exp2 (target, arg)); return target; } case GCN_BUILTIN_FREXPVF_MANT: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64sf_mant2_exec - (target, arg, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_frexpv64sf_mant2 (target, arg)); return target; } case GCN_BUILTIN_FREXPV_EXP: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64DFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64DFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64df_exp2_exec - (target, arg, gcn_gen_undef (V64SImode), exec)); + emit_insn (gen_frexpv64df_exp2 (target, arg)); return target; } case GCN_BUILTIN_FREXPV_MANT: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64DFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64DFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64df_mant2_exec - (target, arg, gcn_gen_undef (V64DFmode), exec)); + emit_insn (gen_frexpv64df_mant2 (target, arg)); return target; } case GCN_BUILTIN_OMP_DIM_SIZE: @@ -4239,10 +4552,11 @@ gcn_vectorize_get_mask_mode (machine_mode) Helper function for gcn_vectorize_vec_perm_const. */ static rtx -gcn_make_vec_perm_address (unsigned int *perm) +gcn_make_vec_perm_address (unsigned int *perm, int nelt) { - rtx x = gen_reg_rtx (V64SImode); - emit_move_insn (x, gcn_vec_constant (V64SImode, 0)); + machine_mode mode = VnMODE (nelt, SImode); + rtx x = gen_reg_rtx (mode); + emit_move_insn (x, gcn_vec_constant (mode, 0)); /* Permutation addresses use byte addressing. With each vector lane being 4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant, @@ -4258,15 +4572,13 @@ gcn_make_vec_perm_address (unsigned int *perm) { uint64_t exec_mask = 0; uint64_t lane_mask = 1; - for (int j = 0; j < 64; j++, lane_mask <<= 1) - if ((perm[j] * 4) & bit_mask) + for (int j = 0; j < nelt; j++, lane_mask <<= 1) + if (((perm[j] % nelt) * 4) & bit_mask) exec_mask |= lane_mask; if (exec_mask) - emit_insn (gen_addv64si3_exec (x, x, - gcn_vec_constant (V64SImode, - bit_mask), - x, get_exec (exec_mask))); + emit_insn (gen_addvNsi3 (x, x, gcn_vec_constant (mode, bit_mask), + x, get_exec (exec_mask))); } return x; @@ -4336,39 +4648,11 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, src1_lanes |= lane_bit; } - rtx addr = gcn_make_vec_perm_address (perm); - rtx (*ds_bpermute) (rtx, rtx, rtx, rtx); - - switch (vmode) - { - case E_V64QImode: - ds_bpermute = gen_ds_bpermutev64qi; - break; - case E_V64HImode: - ds_bpermute = gen_ds_bpermutev64hi; - break; - case E_V64SImode: - ds_bpermute = gen_ds_bpermutev64si; - break; - case E_V64HFmode: - ds_bpermute = gen_ds_bpermutev64hf; - break; - case E_V64SFmode: - ds_bpermute = gen_ds_bpermutev64sf; - break; - case E_V64DImode: - ds_bpermute = gen_ds_bpermutev64di; - break; - case E_V64DFmode: - ds_bpermute = gen_ds_bpermutev64df; - break; - default: - gcc_assert (false); - } + rtx addr = gcn_make_vec_perm_address (perm, nelt); /* Load elements from src0 to dst. */ - gcc_assert (~src1_lanes); - emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ())); + gcc_assert ((~src1_lanes) & (0xffffffffffffffffUL > (64-nelt))); + emit_insn (gen_ds_bpermutevNm (dst, addr, src0, get_exec (vmode))); /* Load elements from src1 to dst. */ if (src1_lanes) @@ -4379,8 +4663,8 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, the two source vectors together. */ rtx tmp = gen_reg_rtx (vmode); - emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ())); - emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes))); + emit_insn (gen_ds_bpermutevNm (tmp, addr, src1, get_exec (vmode))); + emit_insn (gen_movvNm (dst, tmp, dst, get_exec (src1_lanes))); } return true; @@ -4396,7 +4680,22 @@ gcn_vector_mode_supported_p (machine_mode mode) { return (mode == V64QImode || mode == V64HImode || mode == V64SImode || mode == V64DImode - || mode == V64SFmode || mode == V64DFmode); + || mode == V64SFmode || mode == V64DFmode + || mode == V32QImode || mode == V32HImode + || mode == V32SImode || mode == V32DImode + || mode == V32SFmode || mode == V32DFmode + || mode == V16QImode || mode == V16HImode + || mode == V16SImode || mode == V16DImode + || mode == V16SFmode || mode == V16DFmode + || mode == V8QImode || mode == V8HImode + || mode == V8SImode || mode == V8DImode + || mode == V8SFmode || mode == V8DFmode + || mode == V4QImode || mode == V4HImode + || mode == V4SImode || mode == V4DImode + || mode == V4SFmode || mode == V4DFmode + || mode == V2QImode || mode == V2HImode + || mode == V2SImode || mode == V2DImode + || mode == V2SFmode || mode == V2DFmode); } /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. @@ -4425,23 +4724,74 @@ gcn_vectorize_preferred_simd_mode (scalar_mode mode) } } +/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. + + Try all the vector modes. */ + +unsigned int gcn_autovectorize_vector_modes (vector_modes *modes, + bool ARG_UNUSED (all)) +{ + modes->safe_push (V64QImode); + modes->safe_push (V64HImode); + modes->safe_push (V64SImode); + modes->safe_push (V64SFmode); + modes->safe_push (V64DImode); + modes->safe_push (V64DFmode); + + modes->safe_push (V32QImode); + modes->safe_push (V32HImode); + modes->safe_push (V32SImode); + modes->safe_push (V32SFmode); + modes->safe_push (V32DImode); + modes->safe_push (V32DFmode); + + modes->safe_push (V16QImode); + modes->safe_push (V16HImode); + modes->safe_push (V16SImode); + modes->safe_push (V16SFmode); + modes->safe_push (V16DImode); + modes->safe_push (V16DFmode); + + modes->safe_push (V8QImode); + modes->safe_push (V8HImode); + modes->safe_push (V8SImode); + modes->safe_push (V8SFmode); + modes->safe_push (V8DImode); + modes->safe_push (V8DFmode); + + modes->safe_push (V4QImode); + modes->safe_push (V4HImode); + modes->safe_push (V4SImode); + modes->safe_push (V4SFmode); + modes->safe_push (V4DImode); + modes->safe_push (V4DFmode); + + modes->safe_push (V2QImode); + modes->safe_push (V2HImode); + modes->safe_push (V2SImode); + modes->safe_push (V2SFmode); + modes->safe_push (V2DImode); + modes->safe_push (V2DFmode); + + /* We shouldn't need VECT_COMPARE_COSTS as they should all cost the same. */ + return 0; +} + /* Implement TARGET_VECTORIZE_RELATED_MODE. All GCN vectors are 64-lane, so this is simpler than other architectures. In particular, we do *not* want to match vector bit-size. */ static opt_machine_mode -gcn_related_vector_mode (machine_mode ARG_UNUSED (vector_mode), +gcn_related_vector_mode (machine_mode vector_mode, scalar_mode element_mode, poly_uint64 nunits) { - if (known_ne (nunits, 0U) && known_ne (nunits, 64U)) - return VOIDmode; + int n = nunits.to_constant (); - machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode); - if (!VECTOR_MODE_P (pref_mode)) - return VOIDmode; + if (n == 0) + n = GET_MODE_NUNITS (vector_mode); - return pref_mode; + return VnMODE (n, element_mode); } /* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. @@ -4566,6 +4916,8 @@ gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn, The vector register SRC of mode MODE is reduced using the operation given by UNSPEC, and the scalar result is returned in lane 63 of a vector register. */ +/* FIXME: Implement reductions for sizes other than V64. + (They're currently disabled in the machine description.) */ rtx gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec) @@ -4975,10 +5327,11 @@ gcn_md_reorg (void) { if (VECTOR_MODE_P (GET_MODE (x))) { - new_exec = -1; - break; + int vf = GET_MODE_NUNITS (GET_MODE (x)); + new_exec = MAX ((uint64_t)new_exec, + 0xffffffffffffffffUL >> (64-vf)); } - else + else if (new_exec == 0) new_exec = 1; } } @@ -5693,13 +6046,12 @@ static void print_reg (FILE *file, rtx x) { machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); if (mode == BImode || mode == QImode || mode == HImode || mode == SImode - || mode == HFmode || mode == SFmode - || mode == V64SFmode || mode == V64SImode - || mode == V64QImode || mode == V64HImode) + || mode == HFmode || mode == SFmode) fprintf (file, "%s", reg_names[REGNO (x)]); - else if (mode == DImode || mode == V64DImode - || mode == DFmode || mode == V64DFmode) + else if (mode == DImode || mode == DFmode) { if (SGPR_REGNO_P (REGNO (x))) fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG, @@ -6146,20 +6498,20 @@ print_operand (FILE *file, rtx x, int code) case 'o': { const char *s = 0; - switch (GET_MODE_SIZE (GET_MODE (x))) + machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + switch (mode) { - case 1: + case E_QImode: s = "_ubyte"; break; - case 2: + case E_HImode: + case E_HFmode: s = "_ushort"; break; - /* The following are full-vector variants. */ - case 64: - s = "_ubyte"; - break; - case 128: - s = "_ushort"; + default: break; } @@ -6174,43 +6526,31 @@ print_operand (FILE *file, rtx x, int code) } case 's': { - const char *s = ""; - switch (GET_MODE_SIZE (GET_MODE (x))) + const char *s; + machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + switch (mode) { - case 1: + case E_QImode: s = "_byte"; break; - case 2: + case E_HImode: + case E_HFmode: s = "_short"; break; - case 4: + case E_SImode: + case E_SFmode: s = "_dword"; break; - case 8: + case E_DImode: + case E_DFmode: s = "_dwordx2"; break; - case 12: - s = "_dwordx3"; - break; - case 16: + case E_TImode: s = "_dwordx4"; break; - case 32: - s = "_dwordx8"; - break; - case 64: - s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16"; - break; - /* The following are full-vector variants. */ - case 128: - s = "_short"; - break; - case 256: - s = "_dword"; - break; - case 512: - s = "_dwordx2"; - break; default: output_operand_lossage ("invalid operand %%xn code"); return; @@ -6714,6 +7054,9 @@ gcn_dwarf_register_span (rtx rtl) #define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template #undef TARGET_ATTRIBUTE_TABLE #define TARGET_ATTRIBUTE_TABLE gcn_attribute_table +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ + gcn_autovectorize_vector_modes #undef TARGET_BUILTIN_DECL #define TARGET_BUILTIN_DECL gcn_builtin_decl #undef TARGET_CAN_CHANGE_MODE_CLASS diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h index 318256c..38f7212 100644 --- a/gcc/config/gcn/gcn.h +++ b/gcc/config/gcn/gcn.h @@ -678,3 +678,27 @@ enum gcn_builtin_codes /* Trampolines */ #define TRAMPOLINE_SIZE 36 #define TRAMPOLINE_ALIGNMENT 64 + +/* MD Optimization. + The following are intended to be obviously constant at compile time to + allow genconditions to eliminate bad patterns at compile time. */ +#define MODE_VF(M) \ + ((M == V64QImode || M == V64HImode || M == V64HFmode || M == V64SImode \ + || M == V64SFmode || M == V64DImode || M == V64DFmode) \ + ? 64 \ + : (M == V32QImode || M == V32HImode || M == V32HFmode || M == V32SImode \ + || M == V32SFmode || M == V32DImode || M == V32DFmode) \ + ? 32 \ + : (M == V16QImode || M == V16HImode || M == V16HFmode || M == V16SImode \ + || M == V16SFmode || M == V16DImode || M == V16DFmode) \ + ? 16 \ + : (M == V8QImode || M == V8HImode || M == V8HFmode || M == V8SImode \ + || M == V8SFmode || M == V8DImode || M == V8DFmode) \ + ? 8 \ + : (M == V4QImode || M == V4HImode || M == V4HFmode || M == V4SImode \ + || M == V4SFmode || M == V4DImode || M == V4DFmode) \ + ? 4 \ + : (M == V2QImode || M == V2HImode || M == V2HFmode || M == V2SImode \ + || M == V2SFmode || M == V2DImode || M == V2DFmode) \ + ? 2 \ + : 1) diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index 3c702fd..ef56704 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -589,15 +589,12 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* This is unknown family 0x6 CPU. */ if (has_feature (FEATURE_AVX)) { + /* Assume Tiger Lake */ if (has_feature (FEATURE_AVX512VP2INTERSECT)) - { - if (has_feature (FEATURE_TSXLDTRK)) - /* Assume Sapphire Rapids. */ - cpu = "sapphirerapids"; - else - /* Assume Tiger Lake */ - cpu = "tigerlake"; - } + cpu = "tigerlake"; + /* Assume Sapphire Rapids. */ + else if (has_feature (FEATURE_TSXLDTRK)) + cpu = "sapphirerapids"; /* Assume Cooper Lake */ else if (has_feature (FEATURE_AVX512BF16)) cpu = "cooperlake"; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 900a3bc..372a2cf 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2326,10 +2326,9 @@ constexpr wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT constexpr wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT | PTA_KL | PTA_WIDEKL; constexpr wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_ICELAKE_SERVER | PTA_MOVDIRI - | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE - | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE - | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16 - | PTA_AVX512BF16; + | PTA_MOVDIR64B | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG + | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 + | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16 | PTA_AVX512BF16; constexpr wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1; constexpr wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 1be9b66..8e84752 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -10826,6 +10826,39 @@ (set_attr "type" "alu, alu, msklog") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "*notxor<mode>_1" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k") + (not:SWI248 + (xor:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k") + (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, <MODE>mode, operands)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (xor:SWI248 (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) + (not:SWI248 (match_dup 0)))] +{ + if (MASK_REG_P (operands[0])) + { + emit_insn (gen_kxnor<mode> (operands[0], operands[1], operands[2])); + DONE; + } +} + [(set (attr "isa") + (cond [(eq_attr "alternative" "2") + (if_then_else (eq_attr "mode" "SI,DI") + (const_string "avx512bw") + (const_string "avx512f")) + ] + (const_string "*"))) + (set_attr "type" "alu, alu, msklog") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "*iordi_1_bts" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ior:DI @@ -10959,6 +10992,44 @@ (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) +(define_insn_and_split "*notxorqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k") + (not:QI + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") + (match_operand:QI 2 "general_operand" "qn,m,rn,k")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, QImode, operands)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (xor:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) + (not:QI (match_dup 0)))] +{ + if (mask_reg_operand (operands[0], QImode)) + { + emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2])); + DONE; + } +} + [(set_attr "isa" "*,*,*,avx512f") + (set_attr "type" "alu,alu,alu,msklog") + (set (attr "mode") + (cond [(eq_attr "alternative" "2") + (const_string "SI") + (and (eq_attr "alternative" "3") + (match_test "!TARGET_AVX512DQ")) + (const_string "HI") + ] + (const_string "QI"))) + ;; Potential partial reg stall on alternative 2. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "2") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) + ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*<code><mode>_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>")) @@ -20145,8 +20216,8 @@ (set_attr "mode" "<MODE>")]) (define_expand "lrint<MODEF:mode><SWI48:mode>2" - [(set (match_operand:SWI48 0 "nonimmediate_operand") - (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")] + [(set (match_operand:SWI48 0 "register_operand") + (unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")] UNSPEC_FIX_NOTRUNC))] "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH") diff --git a/gcc/config/mips/driver-native.cc b/gcc/config/mips/driver-native.cc index 47627f8..327ad25 100644 --- a/gcc/config/mips/driver-native.cc +++ b/gcc/config/mips/driver-native.cc @@ -23,6 +23,9 @@ along with GCC; see the file COPYING3. If not see #include "system.h" #include "coretypes.h" #include "tm.h" +#ifdef HAVE_SYS_AUXV_H +#include <sys/auxv.h> +#endif /* This will be called by the spec parser in gcc.cc when it sees a %:local_cpu_detect(args) construct. Currently it will be called @@ -41,6 +44,7 @@ const char * host_detect_local_cpu (int argc, const char **argv) { const char *cpu = NULL; + char *ret = NULL; char buf[128]; FILE *f; bool arch; @@ -54,7 +58,7 @@ host_detect_local_cpu (int argc, const char **argv) f = fopen ("/proc/cpuinfo", "r"); if (f == NULL) - return NULL; + goto fallback_cpu; while (fgets (buf, sizeof (buf), f) != NULL) if (startswith (buf, "cpu model")) @@ -84,8 +88,23 @@ host_detect_local_cpu (int argc, const char **argv) fclose (f); +fallback_cpu: +#if defined (__mips_nan2008) + ret = reconcat (ret, " -mnan=2008 ", NULL); +#endif + +#ifdef HAVE_GETAUXVAL if (cpu == NULL) - return NULL; + cpu = (const char *) getauxval (AT_BASE_PLATFORM); +#endif + +#if defined (_MIPS_ARCH) + if (cpu == NULL) + cpu = _MIPS_ARCH; +#endif + + if (cpu) + ret = reconcat (ret, ret, "-m", argv[0], "=", cpu, NULL); - return concat ("-m", argv[0], "=", cpu, NULL); + return ret; } diff --git a/gcc/config/pru/pru-protos.h b/gcc/config/pru/pru-protos.h index 4b190c9..517fa02 100644 --- a/gcc/config/pru/pru-protos.h +++ b/gcc/config/pru/pru-protos.h @@ -52,6 +52,7 @@ extern const char *pru_output_signed_cbranch (rtx *, bool); extern const char *pru_output_signed_cbranch_ubyteop2 (rtx *, bool); extern const char *pru_output_signed_cbranch_zeroop2 (rtx *, bool); +extern enum rtx_code pru_noteq_condition (enum rtx_code code); extern rtx pru_expand_fp_compare (rtx comparison, machine_mode mode); extern void pru_emit_doloop (rtx *, int); diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc index 04eca90..0029dcb 100644 --- a/gcc/config/pru/pru.cc +++ b/gcc/config/pru/pru.cc @@ -895,6 +895,27 @@ pru_init_libfuncs (void) set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull"); } +/* Given a comparison CODE, return a similar comparison but without + the "equals" condition. In other words, it strips GE/GEU/LE/LEU + and instead returns GT/GTU/LT/LTU. */ + +enum rtx_code +pru_noteq_condition (enum rtx_code code) +{ + switch (code) + { + case GT: return GT; + case GTU: return GTU; + case GE: return GT; + case GEU: return GTU; + case LT: return LT; + case LTU: return LTU; + case LE: return LT; + case LEU: return LTU; + default: + gcc_unreachable (); + } +} /* Emit comparison instruction if necessary, returning the expression that holds the compare result in the proper mode. Return the comparison diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md index 144cd35..bdc5ad7 100644 --- a/gcc/config/pru/pru.md +++ b/gcc/config/pru/pru.md @@ -703,6 +703,202 @@ [(set_attr "type" "alu") (set_attr "length" "12")]) + +; 64-bit LSHIFTRT with a constant shift count can be expanded into +; more efficient code sequence than a variable register shift. +; +; 1. For shift >= 32: +; dst_lo = (src_hi >> (shift - 32)) +; dst_hi = 0 +; +; 2. For shift==1 there is no need for a temporary: +; dst_lo = (src_lo >> 1) +; if (src_hi & 1) +; dst_lo |= (1 << 31) +; dst_hi = (src_hi >> 1) +; +; 3. For shift < 32: +; dst_lo = (src_lo >> shift) +; tmp = (src_hi << (32 - shift) +; dst_lo |= tmp +; dst_hi = (src_hi >> shift) +; +; 4. For shift in a register: +; Fall back to calling libgcc. +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "register_operand") + (lshiftrt:DI + (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand")))] + "" +{ + gcc_assert (CONST_INT_P (operands[2])); + + const int nshifts = INTVAL (operands[2]); + rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0); + rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4); + rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0); + rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4); + + if (nshifts >= 32) + { + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_LSHIFTRT (SImode, + src_hi, + GEN_INT (nshifts - 32)))); + emit_insn (gen_rtx_SET (dst_hi, const0_rtx)); + DONE; + } + + gcc_assert (can_create_pseudo_p ()); + + /* The expansions which follow are safe only if DST_LO and SRC_HI + do not overlap. If they do, then fix by using a temporary register. + Overlapping of DST_HI and SRC_LO is safe because by the time DST_HI + is set, SRC_LO is no longer live. */ + if (reg_overlap_mentioned_p (dst_lo, src_hi)) + { + rtx new_src_hi = gen_reg_rtx (SImode); + + emit_move_insn (new_src_hi, src_hi); + src_hi = new_src_hi; + } + + if (nshifts == 1) + { + rtx_code_label *skip_hiset_label; + rtx j; + + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_LSHIFTRT (SImode, src_lo, const1_rtx))); + + /* The code generated by `genemit' would create a LABEL_REF. */ + skip_hiset_label = gen_label_rtx (); + j = emit_jump_insn (gen_cbranch_qbbx_const (EQ, + SImode, + src_hi, + GEN_INT (0), + skip_hiset_label)); + JUMP_LABEL (j) = skip_hiset_label; + LABEL_NUSES (skip_hiset_label)++; + + emit_insn (gen_iorsi3 (dst_lo, dst_lo, GEN_INT (1 << 31))); + emit_label (skip_hiset_label); + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_LSHIFTRT (SImode, src_hi, const1_rtx))); + DONE; + } + + if (nshifts < 32) + { + rtx tmpval = gen_reg_rtx (SImode); + + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_LSHIFTRT (SImode, + src_lo, + GEN_INT (nshifts)))); + emit_insn (gen_rtx_SET (tmpval, + gen_rtx_ASHIFT (SImode, + src_hi, + GEN_INT (32 - nshifts)))); + emit_insn (gen_iorsi3 (dst_lo, dst_lo, tmpval)); + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_LSHIFTRT (SImode, + src_hi, + GEN_INT (nshifts)))); + DONE; + } + gcc_unreachable (); +}) + +; 64-bit ASHIFT with a constant shift count can be expanded into +; more efficient code sequence than the libgcc call required by +; a variable shift in a register. + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "register_operand") + (ashift:DI + (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand")))] + "" +{ + gcc_assert (CONST_INT_P (operands[2])); + + const int nshifts = INTVAL (operands[2]); + rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0); + rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4); + rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0); + rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4); + + if (nshifts >= 32) + { + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_ASHIFT (SImode, + src_lo, + GEN_INT (nshifts - 32)))); + emit_insn (gen_rtx_SET (dst_lo, const0_rtx)); + DONE; + } + + gcc_assert (can_create_pseudo_p ()); + + /* The expansions which follow are safe only if DST_HI and SRC_LO + do not overlap. If they do, then fix by using a temporary register. + Overlapping of DST_LO and SRC_HI is safe because by the time DST_LO + is set, SRC_HI is no longer live. */ + if (reg_overlap_mentioned_p (dst_hi, src_lo)) + { + rtx new_src_lo = gen_reg_rtx (SImode); + + emit_move_insn (new_src_lo, src_lo); + src_lo = new_src_lo; + } + + if (nshifts == 1) + { + rtx_code_label *skip_hiset_label; + rtx j; + + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_ASHIFT (SImode, src_hi, const1_rtx))); + + skip_hiset_label = gen_label_rtx (); + j = emit_jump_insn (gen_cbranch_qbbx_const (EQ, + SImode, + src_lo, + GEN_INT (31), + skip_hiset_label)); + JUMP_LABEL (j) = skip_hiset_label; + LABEL_NUSES (skip_hiset_label)++; + + emit_insn (gen_iorsi3 (dst_hi, dst_hi, GEN_INT (1 << 0))); + emit_label (skip_hiset_label); + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_ASHIFT (SImode, src_lo, const1_rtx))); + DONE; + } + + if (nshifts < 32) + { + rtx tmpval = gen_reg_rtx (SImode); + + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_ASHIFT (SImode, + src_hi, + GEN_INT (nshifts)))); + emit_insn (gen_rtx_SET (tmpval, + gen_rtx_LSHIFTRT (SImode, + src_lo, + GEN_INT (32 - nshifts)))); + emit_insn (gen_iorsi3 (dst_hi, dst_hi, tmpval)); + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_ASHIFT (SImode, + src_lo, + GEN_INT (nshifts)))); + DONE; + } + gcc_unreachable (); +}) ;; Include ALU patterns with zero-extension of operands. That's where ;; the real insns are defined. @@ -1113,6 +1309,186 @@ operands[2] = XEXP (t, 1); }) +;; Expand the cbranchdi pattern in order to avoid the default +;; expansion into word_mode operations, which is not efficient for PRU. +;; In pseudocode this expansion outputs: +;; +;; /* EQ */ +;; if (OP1_hi {reverse_condition (cmp)} OP2_hi) +;; goto fallthrough +;; if (OP1_lo {cmp} OP2_lo) +;; goto label3 +;; fallthrough: +;; +;; /* NE */ +;; if (OP1_hi {cmp} OP2_hi) +;; goto label3 +;; if (OP1_lo {cmp} OP2_lo) +;; goto label3 +;; +;; The LT comparisons with zero take one machine instruction to simply +;; check the sign bit. The GT comparisons with zero take two - one +;; to check the sign bit, and one to check for zero. Hence arrange +;; the expand such that only LT comparison is used for OP1_HI, because +;; OP2_HI is const0_rtx. +;; +;; The LTU comparisons with zero will be removed by subsequent passes. +;; +;; /* LT/LTU/LE/LEU */ +;; if (OP1_hi {noteq_condition (cmp)} OP2_hi) +;; goto label3 /* DI comparison obviously true. */ +;; if (OP1_hi != OP2_hi) +;; goto fallthrough /* DI comparison obviously not true. */ +;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo) +;; goto label3 /* Comparison was deferred to lo parts. */ +;; fallthrough: + +;; /* GT/GTU/GE/GEU */ +;; if (OP1_hi {reverse_condition (noteq_condition (cmp))} OP2_hi) +;; goto fallthrough /* DI comparison obviously not true. */ +;; if (OP1_hi != OP2_hi) +;; goto label3 /* DI comparison obviously true. */ +;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo) +;; goto label3 /* Comparison was deferred to lo parts. */ +;; fallthrough: + +(define_expand "cbranchdi4" + [(set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" + [(match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_ubyte_operand")]) + (label_ref (match_operand 3 "")) + (pc)))] + "" +{ + const enum rtx_code code = GET_CODE (operands[0]); + rtx label3 = operands[3]; + rtx op1_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0); + rtx op1_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4); + rtx op2_lo = simplify_gen_subreg (SImode, operands[2], DImode, 0); + rtx op2_hi = simplify_gen_subreg (SImode, operands[2], DImode, 4); + rtx j; + + if (code == EQ) + { + rtx label_fallthrough = gen_label_rtx (); + rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough); + + rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label_fallthrough_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label_fallthrough; + LABEL_NUSES (label_fallthrough)++; + + rtx label3_ref = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (EQ, VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + emit_label (label_fallthrough); + DONE; + } + if (code == NE) + { + rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label3_ref1, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (NE, VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref2, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + DONE; + } + + if (code == LT || code == LTU || code == LE || code == LEU) + { + /* Check for "DI comparison obviously true". */ + rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (code), + VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label3_ref1, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + /* Check for "DI comparison obviously not true". */ + rtx label_fallthrough = gen_label_rtx (); + rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough); + rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine, + label_fallthrough_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine)); + JUMP_LABEL (j) = label_fallthrough; + LABEL_NUSES (label_fallthrough)++; + + /* Comparison deferred to the lo parts. */ + rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code), + VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref2, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + emit_label (label_fallthrough); + DONE; + } + + if (code == GT || code == GTU || code == GE || code == GEU) + { + /* Check for "DI comparison obviously not true". */ + const enum rtx_code reversed_code = reverse_condition (code); + rtx label_fallthrough = gen_label_rtx (); + rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough); + rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (reversed_code), + VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label_fallthrough_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label_fallthrough; + LABEL_NUSES (label_fallthrough)++; + + /* Check for "DI comparison obviously true". */ + rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine, + label3_ref1, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + /* Comparison deferred to the lo parts. */ + rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code), + VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref2, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + emit_label (label_fallthrough); + DONE; + } + gcc_unreachable (); +}) + ; ; Bit test branch diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc index cac0043..78f6eac 100644 --- a/gcc/config/riscv/riscv-c.cc +++ b/gcc/config/riscv/riscv-c.cc @@ -190,4 +190,4 @@ void riscv_register_pragmas (void) { c_register_pragma ("riscv", "intrinsic", riscv_pragma_intrinsic); -}
\ No newline at end of file +} diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index 0096e32..99c4825 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -50,25 +50,56 @@ using namespace riscv_vector; namespace riscv_vector { +/* Static information about each vector type. */ +struct vector_type_info +{ + /* The name of the type as declared by riscv_vector.h + which is recommend to use. For example: 'vint32m1_t'. */ + const char *name; + + /* ABI name of vector type. The type is always available + under this name, even when riscv_vector.h isn't included. + For example: '__rvv_int32m1_t'. */ + const char *abi_name; + + /* The C++ mangling of ABI_NAME. */ + const char *mangled_name; +}; + /* Information about each RVV type. */ static CONSTEXPR const vector_type_info vector_types[] = { -#define DEF_RVV_TYPE(USER_NAME, NCHARS, ABI_NAME, ARGS...) \ - {#USER_NAME, #ABI_NAME, "u" #NCHARS #ABI_NAME}, +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, ARGS...) \ + {#NAME, #ABI_NAME, "u" #NCHARS #ABI_NAME}, #include "riscv-vector-builtins.def" }; -/* The scalar type associated with each vector type. */ -static GTY (()) tree scalar_types[NUM_VECTOR_TYPES]; -/* The machine mode associated with each vector type. */ -static GTY (()) machine_mode vector_modes[NUM_VECTOR_TYPES]; /* The RVV types, with their built-in "__rvv..._t" name. Allow an index of NUM_VECTOR_TYPES, which always yields a null tree. */ -static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1]; +static GTY (()) tree abi_vector_types[NUM_VECTOR_TYPES + 1]; /* Same, but with the riscv_vector.h "v..._t" name. */ -extern GTY(()) tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; -tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; +extern GTY (()) rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1]; +rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1]; + +/* RAII class for enabling enough RVV features to define the built-in + types and implement the riscv_vector.h pragma. + + Note: According to 'TYPE_MODE' macro implementation, we need set + have_regs_of_mode[mode] to be true if we want to get the exact mode + from 'TYPE_MODE'. However, have_regs_of_mode has not been set yet in + targetm.init_builtins (). We need rvv_switcher to set have_regs_of_mode + before targetm.init_builtins () and recover back have_regs_of_mode + after targetm.init_builtins (). */ +class rvv_switcher +{ +public: + rvv_switcher (); + ~rvv_switcher (); + +private: + bool m_old_have_regs_of_mode[MAX_MACHINE_MODE]; +}; rvv_switcher::rvv_switcher () { @@ -93,8 +124,8 @@ add_vector_type_attribute (tree type, const char *mangled_name) { tree mangled_name_tree = get_identifier (mangled_name); tree value = tree_cons (NULL_TREE, mangled_name_tree, NULL_TREE); - TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("RVV type"), value, - TYPE_ATTRIBUTES (type)); + TYPE_ATTRIBUTES (type) + = tree_cons (get_identifier ("RVV type"), value, TYPE_ATTRIBUTES (type)); } /* Force TYPE to be a sizeless type. */ @@ -137,6 +168,39 @@ mangle_builtin_type (const_tree type) return NULL; } +/* Return a representation of "const T *". */ +static tree +build_const_pointer (tree t) +{ + return build_pointer_type (build_qualified_type (t, TYPE_QUAL_CONST)); +} + +/* Helper function for register a single built-in RVV ABI type. */ +static void +register_builtin_type (vector_type_index type, tree eltype, machine_mode mode) +{ + builtin_types[type].scalar = eltype; + builtin_types[type].scalar_ptr = build_pointer_type (eltype); + builtin_types[type].scalar_const_ptr = build_const_pointer (eltype); + if (!riscv_v_ext_enabled_vector_mode_p (mode)) + return; + + tree vectype = build_vector_type_for_mode (eltype, mode); + gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype)) && TYPE_MODE (vectype) == mode + && TYPE_MODE_RAW (vectype) == mode && TYPE_ALIGN (vectype) <= 128 + && known_eq (tree_to_poly_uint64 (TYPE_SIZE (vectype)), + GET_MODE_BITSIZE (mode))); + vectype = build_distinct_type_copy (vectype); + gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype)); + SET_TYPE_STRUCTURAL_EQUALITY (vectype); + TYPE_ARTIFICIAL (vectype) = 1; + TYPE_INDIVISIBLE_P (vectype) = 1; + add_vector_type_attribute (vectype, vector_types[type].mangled_name); + make_type_sizeless (vectype); + abi_vector_types[type] = vectype; + lang_hooks.types.register_builtin_type (vectype, vector_types[type].abi_name); +} + /* Register the built-in RVV ABI types, such as __rvv_int32m1_t. */ static void register_builtin_types () @@ -151,42 +215,12 @@ register_builtin_types () = TARGET_64BIT ? unsigned_intSI_type_node : long_unsigned_type_node; machine_mode mode; -#define DEF_RVV_TYPE(USER_NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ - VECTOR_MODE_MIN_VLEN_32) \ +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ + VECTOR_MODE_MIN_VLEN_32, ARGS...) \ mode = TARGET_MIN_VLEN > 32 ? VECTOR_MODE##mode \ : VECTOR_MODE_MIN_VLEN_32##mode; \ - scalar_types[VECTOR_TYPE_##USER_NAME] \ - = riscv_v_ext_enabled_vector_mode_p (mode) ? SCALAR_TYPE##_type_node \ - : NULL_TREE; \ - vector_modes[VECTOR_TYPE_##USER_NAME] \ - = riscv_v_ext_enabled_vector_mode_p (mode) ? mode : VOIDmode; + register_builtin_type (VECTOR_TYPE_##NAME, SCALAR_TYPE##_type_node, mode); #include "riscv-vector-builtins.def" - - for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i) - { - tree eltype = scalar_types[i]; - mode = vector_modes[i]; - /* We disabled the datatypes according '-march'. */ - if (!eltype) - continue; - - tree vectype = build_vector_type_for_mode (eltype, mode); - gcc_assert ( - VECTOR_MODE_P (TYPE_MODE (vectype)) && TYPE_MODE (vectype) == mode - && TYPE_MODE_RAW (vectype) == mode && TYPE_ALIGN (vectype) <= 128 - && known_eq (tree_to_poly_uint64 (TYPE_SIZE (vectype)), - GET_MODE_BITSIZE (mode))); - vectype = build_distinct_type_copy (vectype); - gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype)); - SET_TYPE_STRUCTURAL_EQUALITY (vectype); - TYPE_ARTIFICIAL (vectype) = 1; - TYPE_INDIVISIBLE_P (vectype) = 1; - add_vector_type_attribute (vectype, vector_types[i].mangled_name); - make_type_sizeless (vectype); - abi_vector_types[i] = vectype; - lang_hooks.types.register_builtin_type (vectype, - vector_types[i].abi_name); - } } /* Register vector type TYPE under its risv_vector.h name. */ @@ -198,7 +232,7 @@ register_vector_type (vector_type_index type) is disabled according to '-march'. */ if (!vectype) return; - tree id = get_identifier (vector_types[type].user_name); + tree id = get_identifier (vector_types[type].name); tree decl = build_decl (input_location, TYPE_DECL, id, vectype); decl = lang_hooks.decls.pushdecl (decl); @@ -212,7 +246,8 @@ register_vector_type (vector_type_index type) && TYPE_MAIN_VARIANT (TREE_TYPE (decl)) == vectype) vectype = TREE_TYPE (decl); - builtin_vector_types[0][type] = vectype; + builtin_types[type].vector = vectype; + builtin_types[type].vector_ptr = build_pointer_type (vectype); } /* Initialize all compiler built-ins related to RVV that should be diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def index a9001b3..83603fe 100644 --- a/gcc/config/riscv/riscv-vector-builtins.def +++ b/gcc/config/riscv/riscv-vector-builtins.def @@ -32,7 +32,7 @@ along with GCC; see the file COPYING3. If not see TARGET_MIN_VLEN > 32. Otherwise the machine mode is VNx1SImode. */ #ifndef DEF_RVV_TYPE -#define DEF_RVV_TYPE(USER_NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ VECTOR_MODE_MIN_VLEN_32) #endif diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index 6ca0b07..ea67da9 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -23,52 +23,23 @@ namespace riscv_vector { -/* This is for segment instructions. */ -const unsigned int MAX_TUPLE_SIZE = 8; - -/* Static information about each vector type. */ -struct vector_type_info -{ - /* The name of the type as declared by riscv_vector.h - which is recommend to use. For example: 'vint32m1_t'. */ - const char *user_name; - - /* ABI name of vector type. The type is always available - under this name, even when riscv_vector.h isn't included. - For example: '__rvv_int32m1_t'. */ - const char *abi_name; - - /* The C++ mangling of ABI_NAME. */ - const char *mangled_name; -}; - /* Enumerates the RVV types, together called "vector types" for brevity. */ enum vector_type_index { -#define DEF_RVV_TYPE(USER_NAME, ABI_NAME, NCHARS, ARGS...) \ - VECTOR_TYPE_##USER_NAME, +#define DEF_RVV_TYPE(NAME, ABI_NAME, NCHARS, ARGS...) VECTOR_TYPE_##NAME, #include "riscv-vector-builtins.def" NUM_VECTOR_TYPES }; -/* RAII class for enabling enough RVV features to define the built-in - types and implement the riscv_vector.h pragma. - - Note: According to 'TYPE_MODE' macro implementation, we need set - have_regs_of_mode[mode] to be true if we want to get the exact mode - from 'TYPE_MODE'. However, have_regs_of_mode has not been set yet in - targetm.init_builtins (). We need rvv_switcher to set have_regs_of_mode - before targetm.init_builtins () and recover back have_regs_of_mode - after targetm.init_builtins (). */ -class rvv_switcher +/* Builtin types that are used to register RVV intrinsics. */ +struct GTY (()) rvv_builtin_types_t { -public: - rvv_switcher (); - ~rvv_switcher (); - -private: - bool m_old_have_regs_of_mode[MAX_MACHINE_MODE]; + tree vector; + tree scalar; + tree vector_ptr; + tree scalar_ptr; + tree scalar_const_ptr; }; } // end namespace riscv_vector diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 014206f..2d1cda2 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -229,6 +229,7 @@ ;; Classification of RVV instructions which will be added to each RVV .md pattern and used by scheduler. ;; rdvlenb vector byte length vlenb csrr read ;; rdvl vector length vl csrr read +;; vsetvl vector configuration-setting instrucions ;; 7. Vector Loads and Stores ;; vlde vector unit-stride load instructions ;; vste vector unit-stride store instructions @@ -316,7 +317,7 @@ "unknown,branch,jump,call,load,fpload,store,fpstore, mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul, fmadd,fdiv,fcmp,fcvt,fsqrt,multi,auipc,sfb_alu,nop,ghost,bitmanip,rotate, - rdvlenb,rdvl,vlde,vste,vldm,vstm,vlds,vsts, + rdvlenb,rdvl,vsetvl,vlde,vste,vldm,vstm,vlds,vsts, vldux,vldox,vstux,vstox,vldff,vldr,vstr, vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp, vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov, diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h index 075a451..e7e5ffe 100644 --- a/gcc/config/vxworks.h +++ b/gcc/config/vxworks.h @@ -224,14 +224,54 @@ extern void vxworks_driver_init (unsigned int *, struct cl_decoded_option **); #undef VXWORKS_LINK_SPEC #define VXWORKS_LINK_SPEC VXWORKS_BASE_LINK_SPEC " " VXWORKS_EXTRA_LINK_SPEC +/* Control how to include libgcc in the link closure, handling both "shared" + and "non-static" in addition to "static-libgcc" when shared lib support is + enabled. */ + #undef VXWORKS_LIBGCC_SPEC + +/* libgcc_eh control; libgcc_eh.a is available either together with libgcc_s + (mrtp and mcmodel!=large when configured with --enable-shared) or when the + compiler is specially setup to support dual sjlj/table-based eh. */ + +/* VX_LGCC_EH_SO1: The "-lgcc_eh" part we need in situations where we know a + shared libgcc is available (ENABLE_SHARED_LIBGCC + mrtp multilib). */ + +#define VX_LGCC_EH_SO1 " -lgcc_eh -lgcc" +/* Extra -lgcc to handle functions from libgcc_eh that refer to symbols + exposed by libgcc and not guaranteed to be dragged in before -lgcc_eh + appears. */ + +/* VX_LGCC_EH_SO0: The "-lgcc_eh" part we need in situations where we know a + shared libgcc is not available (!ENABLE_SHARED_LIBGCC or !mrtp multlib). */ + +#if !defined(CONFIG_DUAL_EXCEPTIONS) + +/* No shared lib && !DUAL_EH -> no libgcc_eh available at all. */ +#define VX_LGCC_EH_SO0 + +#else /* CONFIG_DUAL_EXCEPTIONS */ + +/* No shared lib but DUAL_EH -> libgcc_eh around and spec handled by the driver + depending on ENABLE_SHARED_LIBGCC. If defined, the driver expects a regular + sequence. Otherwise, the driver is expected to turn -lgcc into -lgcc_eh on + its own and just add an instance to address possible cross refs. */ + +#if defined(ENABLE_SHARED_LIBGCC) +#define VX_LGCC_EH_SO0 " -lgcc_eh -lgcc" +#else +#define VX_LGCC_EH_SO0 " -lgcc" +#endif + +#endif /* CONFIG_DUAL_EXCEPTIONS */ + #if defined(ENABLE_SHARED_LIBGCC) #define VXWORKS_LIBGCC_SPEC \ -"%{!mrtp:-lgcc -lgcc_eh} \ - %{mrtp:%{!static-libgcc:%{shared|non-static:-lgcc_s;:-lgcc -lgcc_eh}} \ - %{static-libgcc:-lgcc -lgcc_eh}}" + "%{!mrtp|mcmodel=large:-lgcc" VX_LGCC_EH_SO0 ";" \ + " :%{!static-libgcc:%{shared|non-static:-lgcc_s;:-lgcc" VX_LGCC_EH_SO1 "}} \ + %{static-libgcc:-lgcc" VX_LGCC_EH_SO1 "}}" #else -#define VXWORKS_LIBGCC_SPEC "-lgcc" +#define VXWORKS_LIBGCC_SPEC "-lgcc" VX_LGCC_EH_SO0 #endif /* Setup the crtstuff begin/end we might need for dwarf EH registration |