aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorMartin Liska <mliska@suse.cz>2022-10-13 15:54:17 +0200
committerMartin Liska <mliska@suse.cz>2022-10-13 15:54:17 +0200
commitbd21c04269deded2c7476ceca1100a26f28ea526 (patch)
tree197bf75eedac69362078a4ccc0afe5615c45c327 /gcc/config
parentd9e7934d25da4a78ffef1f738206aa1d897911df (diff)
parent786e4c024f941671a233f5779d73a5d22f4e9588 (diff)
downloadgcc-bd21c04269deded2c7476ceca1100a26f28ea526.zip
gcc-bd21c04269deded2c7476ceca1100a26f28ea526.tar.gz
gcc-bd21c04269deded2c7476ceca1100a26f28ea526.tar.bz2
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/arc/arc.cc54
-rw-r--r--gcc/config/arc/arc.md372
-rw-r--r--gcc/config/arc/arc.opt10
-rw-r--r--gcc/config/arc/constraints.md44
-rw-r--r--gcc/config/gcn/gcn-modes.def82
-rw-r--r--gcc/config/gcn/gcn-protos.h24
-rw-r--r--gcc/config/gcn/gcn-valu.md399
-rw-r--r--gcc/config/gcn/gcn.cc1063
-rw-r--r--gcc/config/gcn/gcn.h24
-rw-r--r--gcc/config/i386/driver-i386.cc13
-rw-r--r--gcc/config/i386/i386.h7
-rw-r--r--gcc/config/i386/i386.md75
-rw-r--r--gcc/config/mips/driver-native.cc25
-rw-r--r--gcc/config/pru/pru-protos.h1
-rw-r--r--gcc/config/pru/pru.cc21
-rw-r--r--gcc/config/pru/pru.md376
-rw-r--r--gcc/config/riscv/riscv-c.cc2
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.cc127
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.def2
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.h45
-rw-r--r--gcc/config/riscv/riscv.md3
-rw-r--r--gcc/config/vxworks.h48
22 files changed, 2013 insertions, 804 deletions
diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index db4b56b..e6f52d8 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -2474,6 +2474,20 @@ arc_setup_incoming_varargs (cumulative_args_t args_so_far,
}
}
+/* Return TRUE if reg is ok for short instrcutions. */
+
+static bool
+arc_check_short_reg_p (rtx op)
+{
+ if (!REG_P (op))
+ return false;
+
+ if (IN_RANGE (REGNO (op) ^ 4, 4, 11))
+ return true;
+
+ return false;
+}
+
/* Cost functions. */
/* Provide the costs of an addressing mode that contains ADDR.
@@ -2485,7 +2499,7 @@ arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed)
switch (GET_CODE (addr))
{
case REG :
- return speed || satisfies_constraint_Rcq (addr) ? 0 : 1;
+ return speed || arc_check_short_reg_p (addr) ? 0 : 1;
case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
case PRE_MODIFY: case POST_MODIFY:
return !speed;
@@ -2517,14 +2531,14 @@ arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed)
? COSTS_N_INSNS (1)
: speed
? 0
- : (satisfies_constraint_Rcq (plus0)
+ : (arc_check_short_reg_p (plus0)
&& satisfies_constraint_O (plus1))
? 0
: 1);
case REG:
return (speed < 1 ? 0
- : (satisfies_constraint_Rcq (plus0)
- && satisfies_constraint_Rcq (plus1))
+ : (arc_check_short_reg_p (plus0)
+ && arc_check_short_reg_p (plus1))
? 0 : 1);
case CONST :
case SYMBOL_REF :
@@ -3356,7 +3370,7 @@ arc_save_callee_enter (uint64_t gmask,
reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
mem = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
- off));
+ -off));
XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
off -= UNITS_PER_WORD;
@@ -3370,7 +3384,7 @@ arc_save_callee_enter (uint64_t gmask,
reg = gen_rtx_REG (SImode, regno);
mem = gen_frame_mem (SImode, plus_constant (Pmode,
stack_pointer_rtx,
- off));
+ -off));
XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
gmask = gmask & ~(1ULL << regno);
@@ -3380,7 +3394,7 @@ arc_save_callee_enter (uint64_t gmask,
{
mem = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
- off));
+ -off));
XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, hard_frame_pointer_rtx);
RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
off -= UNITS_PER_WORD;
@@ -9003,8 +9017,8 @@ arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
int intval = (REG_P (operands[2]) ? 1
: CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057);
int neg_intval = -intval;
- int short_0 = satisfies_constraint_Rcq (operands[0]);
- int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1]));
+ int short_0 = arc_check_short_reg_p (operands[0]);
+ int short_p = (!cond_p && short_0 && arc_check_short_reg_p (operands[1]));
int ret = 0;
#define REG_H_P(OP) (REG_P (OP) && ((TARGET_V2 && REGNO (OP) <= 31 \
@@ -9037,7 +9051,7 @@ arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
patterns. */
if (short_p
&& ((REG_H_P (operands[2])
- && (match || satisfies_constraint_Rcq (operands[2])))
+ && (match || arc_check_short_reg_p (operands[2])))
|| (CONST_INT_P (operands[2])
&& ((unsigned) intval <= (match ? 127 : 7)))))
ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;1");
@@ -9064,7 +9078,7 @@ arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
/* Generate add_s r0,b,u6; add_s r1,b,u6 patterns. */
if (TARGET_CODE_DENSITY && REG_P (operands[0]) && REG_P (operands[1])
&& ((REGNO (operands[0]) == 0) || (REGNO (operands[0]) == 1))
- && satisfies_constraint_Rcq (operands[1])
+ && arc_check_short_reg_p (operands[1])
&& satisfies_constraint_L (operands[2]))
ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;6");
}
@@ -10033,7 +10047,7 @@ split_addsi (rtx *operands)
/* Try for two short insns first. Lengths being equal, we prefer
expansions with shorter register lifetimes. */
if (val > 127 && val <= 255
- && satisfies_constraint_Rcq (operands[0]))
+ && arc_check_short_reg_p (operands[0]))
{
operands[3] = operands[2];
operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
@@ -10057,8 +10071,8 @@ split_subsi (rtx *operands)
/* Try for two short insns first. Lengths being equal, we prefer
expansions with shorter register lifetimes. */
- if (satisfies_constraint_Rcq (operands[0])
- && satisfies_constraint_Rcq (operands[2]))
+ if (arc_check_short_reg_p (operands[0])
+ && arc_check_short_reg_p (operands[2]))
{
if (val >= -31 && val <= 127)
{
@@ -10436,12 +10450,12 @@ arc_lra_p (void)
return arc_lra_flag;
}
-/* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to use
- Rcq registers, because some insn are shorter with them. OTOH we already
- have separate alternatives for this purpose, and other insns don't
- mind, so maybe we should rather prefer the other registers?
- We need more data, and we can only get that if we allow people to
- try all options. */
+/* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to
+ use q registers, because some insn are shorter with them. OTOH we
+ already have separate alternatives for this purpose, and other
+ insns don't mind, so maybe we should rather prefer the other
+ registers? We need more data, and we can only get that if we allow
+ people to try all options. */
static int
arc_register_priority (int r)
{
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 7170445..458d3ed 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -696,15 +696,13 @@ archs4x, archs4xd"
; In order to allow the ccfsm machinery to do its work, the leading compact
; alternatives say 'canuse' - there is another alternative that will match
; when the condition codes are used.
-; Rcq won't match if the condition is actually used; to avoid a spurious match
-; via q, q is inactivated as constraint there.
; Likewise, the length of an alternative that might be shifted to conditional
; execution must reflect this, lest out-of-range branches are created.
; The iscompact attribute allows the epilogue expander to know for which
; insns it should lengthen the return insn.
(define_insn "*movqi_insn"
- [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w, w,???w,h, w,Rcq, S,!*x, r,r, Ucm,m,???m, m,Usc")
- (match_operand:QI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL, I,?Rac,i,?i, T,Rcq,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
+ [(set (match_operand:QI 0 "move_dest_operand" "=q, q,r,q, h, w, w,???w,h, w,q,S,!*x, r,r, Ucm,m,???m, m,Usc")
+ (match_operand:QI 1 "move_src_operand" "rL,rP,q,P,hCm1,cL, I,?Rac,i,?i,T,q,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
"register_operand (operands[0], QImode)
|| register_operand (operands[1], QImode)
|| (satisfies_constraint_Cm3 (operands[1])
@@ -742,8 +740,8 @@ archs4x, archs4xd"
"if (prepare_move_operands (operands, HImode)) DONE;")
(define_insn "*movhi_insn"
- [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w, w,???w,Rcq#q,h, w,Rcq, S, r,r, Ucm,m,???m, m,VUsc")
- (match_operand:HI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL, I,?Rac, i,i,?i, T,Rcq,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
+ [(set (match_operand:HI 0 "move_dest_operand" "=q, q,r,q, h, w, w,???w,q,h, w,q,S, r,r, Ucm,m,???m, m,VUsc")
+ (match_operand:HI 1 "move_src_operand" " rL,rP,q,P,hCm1,cL, I,?Rac,i,i,?i,T,q,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
"register_operand (operands[0], HImode)
|| register_operand (operands[1], HImode)
|| (CONSTANT_P (operands[1])
@@ -793,8 +791,8 @@ archs4x, archs4xd"
; the iscompact attribute allows the epilogue expander to know for which
; insns it should lengthen the return insn.
(define_insn_and_split "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
- [(set (match_operand:SI 0 "move_dest_operand" "=q, q,r,q, h, rl,r, r, r, r, ?r, r, q, h, rl, q, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m, m,VUsc")
- (match_operand:SI 1 "move_src_operand" "rL,rP,q,P,hCm1,rLl,I,Clo,Chi,Cbi,Cpc,Clb,Cax,Cal,Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, r,!*Rzd,r,Cm3, C32"))]
+ [(set (match_operand:SI 0 "move_dest_operand" "=q, q,r,q, h, rl,r, r, r, r, ?r, r, q, h, rl, q, S, Us<,qRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m, m,VUsc")
+ (match_operand:SI 1 "move_src_operand" "rL,rP,q,P,hCm1,rLl,I,Clo,Chi,Cbi,Cpc,Clb,Cax,Cal,Cal,Uts,q,qRck, Us>,Usd,Ucm, Usd, Ucd,m, r,!*Rzd,r,Cm3, C32"))]
"register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode)
|| (CONSTANT_P (operands[1])
@@ -998,8 +996,8 @@ archs4x, archs4xd"
(match_operand 0 "cc_register" "")
(match_operator 4 "zn_compare_operator"
[(and:SI
- (match_operand:SI 1 "register_operand" "%Rcq,Rcq, c, c, c, c,Rrq,Rrq, c")
- (match_operand:SI 2 "nonmemory_operand" "Rcq,C0p,cI,C1p,Ccp,Chs,Cbf,Cbf,???Cal"))
+ (match_operand:SI 1 "register_operand" "%q, q, c, c, c, c, q, q, c")
+ (match_operand:SI 2 "nonmemory_operand" "q,C0p,cI,C1p,Ccp,Chs,Cbf,Cbf,???Cal"))
(const_int 0)]))
(clobber (match_scratch:SI 3 "=X,X,X,X,X,X,Rrq,1,c"))]
"TARGET_NPS_BITOPS"
@@ -1014,9 +1012,9 @@ archs4x, archs4xd"
(match_operator 3 "zn_compare_operator"
[(and:SI
(match_operand:SI 1 "register_operand"
- "%Rcq,Rcq, c, c, c, c, c, c")
+ "%q, q, c, c, c, c, c, c")
(match_operand:SI 2 "nonmemory_operand"
- " Rcq,C0p,cI,cL,C1p,Ccp,Chs,Cal"))
+ " q,C0p,cI,cL,C1p,Ccp,Chs,Cal"))
(const_int 0)]))]
"reload_completed
|| !satisfies_constraint_Cbf (operands[2])
@@ -1092,9 +1090,9 @@ archs4x, archs4xd"
[(set (match_operand:CC_ZN 0 "cc_set_register" "")
(match_operator 5 "zn_compare_operator"
[(zero_extract:SI
- (match_operand:SI 1 "register_operand" "%Rcqq,c, c,Rrq,c")
- (match_operand:SI 2 "const_int_operand" "N,N, n,Cbn,n")
- (match_operand:SI 3 "const_int_operand" "n,n,C_0,Cbn,n"))
+ (match_operand:SI 1 "register_operand" "%q,c, c,Rrq,c")
+ (match_operand:SI 2 "const_int_operand" "N,N, n,Cbn,n")
+ (match_operand:SI 3 "const_int_operand" "n,n,C_0,Cbn,n"))
(const_int 0)]))
(clobber (match_scratch:SI 4 "=X,X,X,Rrq,X"))]
""
@@ -1678,7 +1676,7 @@ archs4x, archs4xd"
""
{
if (rtx_equal_p (operands[1], const0_rtx) && GET_CODE (operands[3]) == NE
- && satisfies_constraint_Rcq (operands[0]))
+ && IN_RANGE (REGNO (operands[0]) ^ 4, 4, 11))
return "sub%?.ne %0,%0,%0";
/* ??? might be good for speed on ARC600 too, *if* properly scheduled. */
if ((optimize_size && (!TARGET_ARC600_FAMILY))
@@ -1980,8 +1978,8 @@ archs4x, archs4xd"
;; Absolute instructions
(define_insn "abssi2"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,w,w")
- (abs:SI (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,Cal")))]
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,w,w")
+ (abs:SI (match_operand:SI 1 "nonmemory_operand" "q,cL,Cal")))]
""
"abs%? %0,%1%&"
[(set_attr "type" "two_cycle_core")
@@ -1991,22 +1989,22 @@ archs4x, archs4xd"
;; Maximum and minimum insns
(define_insn "smaxsi3"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w, w")
- (smax:SI (match_operand:SI 1 "register_operand" "%0, c, c")
- (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))]
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r, r, r")
+ (smax:SI (match_operand:SI 1 "register_operand" "%0, r, r")
+ (match_operand:SI 2 "nonmemory_operand" "rL,rL,Cal")))]
""
- "max%? %0,%1,%2"
+ "max%?\\t%0,%1,%2"
[(set_attr "type" "two_cycle_core")
(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")]
)
(define_insn "sminsi3"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w, w")
- (smin:SI (match_operand:SI 1 "register_operand" "%0, c, c")
- (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))]
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r, r, r")
+ (smin:SI (match_operand:SI 1 "register_operand" "%0, r, r")
+ (match_operand:SI 2 "nonmemory_operand" "rL,rL,Cal")))]
""
- "min%? %0,%1,%2"
+ "min%?\\t%0,%1,%2"
[(set_attr "type" "two_cycle_core")
(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")]
@@ -2028,10 +2026,10 @@ archs4x, archs4xd"
; We avoid letting this pattern use LP_COUNT as a register by specifying
; register class 'W' instead of 'w'.
(define_insn_and_split "*addsi3_mixed"
- ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,Rcq, h,!*Rsd,Rcq,Rcb,Rcq, Rcqq,Rcqq,Rcw,Rcw, Rcw, W, W,W, W,Rcqq,Rcw, W")
- (plus:SI (match_operand:SI 1 "register_operand" "%0, c, 0, Rcqq, 0, 0,Rcb, Rcqq, 0, 0, c, 0, c, c,0, 0, 0, 0, c")
- (match_operand:SI 2 "nonmemory_operand" "cL, 0, Cm1, L,CL2,Csp,CM4,RcqqK, cO, cL, 0,cCca,cLCmL,Cca,I,C2a, Cal,Cal,Cal")))]
+ ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, h,!*Rsd, q,Rcb, q, q, q, r,r, r, W, W,W, W, q, r, W")
+ (plus:SI (match_operand:SI 1 "register_operand" "%0,c, 0, q, 0, 0,Rcb, q, 0, 0,r, 0, c, c,0, 0, 0, 0, c")
+ (match_operand:SI 2 "nonmemory_operand" "cL,0, Cm1, L,CL2,Csp,CM4,qK,cO,rL,0,rCca,cLCmL,Cca,I,C2a,Cal,Cal,Cal")))]
""
{
arc_output_addsi (operands, arc_ccfsm_cond_exec_p (), true);
@@ -2083,9 +2081,9 @@ archs4x, archs4xd"
])
(define_insn "mulhisi3_reg"
- [(set (match_operand:SI 0 "register_operand" "=Rcqq,r,r")
- (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" " 0,0,r"))
- (sign_extend:SI (match_operand:HI 2 "nonmemory_operand" "Rcqq,r,r"))))]
+ [(set (match_operand:SI 0 "register_operand" "=q,r,r")
+ (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "0,0,r"))
+ (sign_extend:SI (match_operand:HI 2 "nonmemory_operand" "q,r,r"))))]
"TARGET_MPYW"
"mpyw%? %0,%1,%2"
[(set_attr "length" "*,4,4")
@@ -2123,9 +2121,9 @@ archs4x, archs4xd"
])
(define_insn "umulhisi3_reg"
- [(set (match_operand:SI 0 "register_operand" "=Rcqq, r, r")
- (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" " %0, 0, r"))
- (zero_extend:SI (match_operand:HI 2 "register_operand" " Rcqq, r, r"))))]
+ [(set (match_operand:SI 0 "register_operand" "=q, r, r")
+ (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%0, 0, r"))
+ (zero_extend:SI (match_operand:HI 2 "register_operand" "q, r, r"))))]
"TARGET_MPYW"
"mpyuw%? %0,%1,%2"
[(set_attr "length" "*,4,4")
@@ -2246,8 +2244,8 @@ archs4x, archs4xd"
(define_insn "mulsi_600"
[(set (match_operand:SI 2 "mlo_operand" "")
- (mult:SI (match_operand:SI 0 "register_operand" "%Rcq#q,c,c,c")
- (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,I,Cal")))
+ (mult:SI (match_operand:SI 0 "register_operand" "%q,c,c,c")
+ (match_operand:SI 1 "nonmemory_operand" "q,cL,I,Cal")))
(clobber (match_operand:SI 3 "mhi_operand" ""))]
"TARGET_MUL64_SET"
"mul64%?\\t0,%0,%1"
@@ -2282,8 +2280,8 @@ archs4x, archs4xd"
(define_insn "mul64"
[(set (reg:DI MUL64_OUT_REG)
(mult:DI
- (sign_extend:DI (match_operand:SI 0 "register_operand" "%Rcq#q, c,c, c"))
- (sign_extend:DI (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,L,C32"))))]
+ (sign_extend:DI (match_operand:SI 0 "register_operand" "%q, c,c, c"))
+ (sign_extend:DI (match_operand:SI 1 "nonmemory_operand" "q,cL,L,C32"))))]
"TARGET_MUL64_SET"
"mul64%? \t0, %0, %1%&"
[(set_attr "length" "*,4,4,8")
@@ -2336,11 +2334,11 @@ archs4x, archs4xd"
; registers, since it cannot be the destination of a multi-cycle insn
; like MPY or MPYU.
(define_insn "mulsi3_700"
- [(set (match_operand:SI 0 "mpy_dest_reg_operand" "=Rcr,r,r,Rcr,r")
- (mult:SI (match_operand:SI 1 "register_operand" "%0,c,0,0,c")
- (match_operand:SI 2 "nonmemory_operand" "cL,cL,I,Cal,Cal")))]
+ [(set (match_operand:SI 0 "mpy_dest_reg_operand" "=r, r,r, r,r")
+ (mult:SI (match_operand:SI 1 "register_operand" "%0, r,0, 0,r")
+ (match_operand:SI 2 "nonmemory_operand" "rL,rL,I,Cal,Cal")))]
"TARGET_ARC700_MPY"
- "mpyu%? %0,%1,%2"
+ "mpyu%?\\t%0,%1,%2"
[(set_attr "length" "4,4,4,8,8")
(set_attr "type" "umulti")
(set_attr "predicable" "yes,no,no,yes,no")
@@ -2501,15 +2499,15 @@ archs4x, archs4xd"
(set_attr "length" "8")])
(define_insn "mulsi3_highpart"
- [(set (match_operand:SI 0 "register_operand" "=Rcr,r,Rcr,r")
+ [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
(truncate:SI
(lshiftrt:DI
(mult:DI
- (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,c, 0,c"))
- (sign_extend:DI (match_operand:SI 2 "extend_operand" "c,c, i,i")))
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,r,0,r"))
+ (sign_extend:DI (match_operand:SI 2 "extend_operand" "r,r,i,i")))
(const_int 32))))]
"TARGET_MPY"
- "mpy%+%? %0,%1,%2"
+ "mpy%+%?\\t%0,%1,%2"
[(set_attr "length" "4,4,8,8")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no,yes,no")
@@ -2518,15 +2516,15 @@ archs4x, archs4xd"
; Note that mpyhu has the same latency as mpy / mpyh,
; thus we use the type multi.
(define_insn "*umulsi3_highpart_i"
- [(set (match_operand:SI 0 "register_operand" "=Rcr,r,Rcr,r")
+ [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
(truncate:SI
(lshiftrt:DI
(mult:DI
- (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,c, 0,c"))
- (zero_extend:DI (match_operand:SI 2 "extend_operand" "c,c, i,i")))
+ (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,r,0,r"))
+ (zero_extend:DI (match_operand:SI 2 "extend_operand" "r,r,i,i")))
(const_int 32))))]
"TARGET_MPY"
- "mpy%+u%? %0,%1,%2"
+ "mpy%+u%?\\t%0,%1,%2"
[(set_attr "length" "4,4,8,8")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no,yes,no")
@@ -2536,15 +2534,15 @@ archs4x, archs4xd"
;; need a separate pattern for immediates
;; ??? This is fine for combine, but not for reload.
(define_insn "umulsi3_highpart_int"
- [(set (match_operand:SI 0 "register_operand" "=Rcr, r, r,Rcr, r")
+ [(set (match_operand:SI 0 "register_operand" "=r, r, r,r, r")
(truncate:SI
(lshiftrt:DI
(mult:DI
- (zero_extend:DI (match_operand:SI 1 "register_operand" " 0, c, 0, 0, c"))
- (match_operand:DI 2 "immediate_usidi_operand" "L, L, I, Cal, Cal"))
+ (zero_extend:DI (match_operand:SI 1 "register_operand" " 0, r, 0, 0, r"))
+ (match_operand:DI 2 "immediate_usidi_operand" "L, L, I,Cal,Cal"))
(const_int 32))))]
"TARGET_MPY"
- "mpy%+u%? %0,%1,%2"
+ "mpy%+u%?\\t%0,%1,%2"
[(set_attr "length" "4,4,4,8,8")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no,no,yes,no")
@@ -2792,13 +2790,13 @@ archs4x, archs4xd"
(define_insn "*add_f_2"
[(set (reg:CC_C CC_REG)
(compare:CC_C
- (plus:SI (match_operand:SI 1 "register_operand" "c,0,c")
- (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal"))
+ (plus:SI (match_operand:SI 1 "register_operand" "r ,0,r")
+ (match_operand:SI 2 "nonmemory_operand" "rL,I,rCal"))
(match_dup 2)))
- (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w")
+ (set (match_operand:SI 0 "dest_reg_operand" "=r,r,r")
(plus:SI (match_dup 1) (match_dup 2)))]
""
- "add.f %0,%1,%2"
+ "add.f\\t%0,%1,%2"
[(set_attr "cond" "set")
(set_attr "type" "compare")
(set_attr "length" "4,4,8")])
@@ -2895,22 +2893,22 @@ archs4x, archs4xd"
; the casesi expander might generate a sub of zero, so we have to recognize it.
; combine should make such an insn go away.
(define_insn_and_split "subsi3_insn"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,Rcw,Rcw,w,w,w, w, w, w")
- (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,Rcqq, 0, cL,c,L,I,Cal,Cal, c")
- (match_operand:SI 2 "nonmemory_operand" "Rcqq,Rcqq, c, 0,c,c,0, 0, c,Cal")))]
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,q,r, r,r,r,r, r, r, r")
+ (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,q,0,rL,r,L,I,Cal,Cal, r")
+ (match_operand:SI 2 "nonmemory_operand" "q,q,r, 0,r,r,0, 0, r,Cal")))]
"register_operand (operands[1], SImode)
|| register_operand (operands[2], SImode)"
"@
- sub%? %0,%1,%2%&
- sub%? %0,%1,%2%&
- sub%? %0,%1,%2
- rsub%? %0,%2,%1
- sub %0,%1,%2
- rsub %0,%2,%1
- rsub %0,%2,%1
- rsub%? %0,%2,%1
- rsub %0,%2,%1
- sub %0,%1,%2"
+ sub%?\\t%0,%1,%2%&
+ sub%?\\t%0,%1,%2%&
+ sub%?\\t%0,%1,%2
+ rsub%?\\t%0,%2,%1
+ sub\\t%0,%1,%2
+ rsub\\t%0,%2,%1
+ rsub\\t%0,%2,%1
+ rsub%?\\t%0,%2,%1
+ rsub\\t%0,%2,%1
+ sub\\t%0,%1,%2"
"reload_completed && get_attr_length (insn) == 8
&& satisfies_constraint_I (operands[1])
&& GET_CODE (PATTERN (insn)) != COND_EXEC"
@@ -2990,19 +2988,19 @@ archs4x, archs4xd"
(define_insn "sub_f"
[(set (reg:CC CC_REG)
- (compare:CC (match_operand:SI 1 "nonmemory_operand" " c,L,0,I,c,Cal")
- (match_operand:SI 2 "nonmemory_operand" "cL,c,I,0,Cal,c")))
- (set (match_operand:SI 0 "dest_reg_operand" "=w,w,Rcw,Rcw,w,w")
+ (compare:CC (match_operand:SI 1 "nonmemory_operand" " r,L,0,I,r,Cal")
+ (match_operand:SI 2 "nonmemory_operand" "rL,r,I,0,Cal,r")))
+ (set (match_operand:SI 0 "dest_reg_operand" "=r,r,r,r,r,r")
(minus:SI (match_dup 1) (match_dup 2)))]
"register_operand (operands[1], SImode)
|| register_operand (operands[2], SImode)"
"@
- sub.f %0,%1,%2
- rsub.f %0,%2,%1
- sub.f %0,%1,%2
- rsub.f %0,%2,%1
- sub.f %0,%1,%2
- sub.f %0,%1,%2"
+ sub.f\\t%0,%1,%2
+ rsub.f\\t%0,%2,%1
+ sub.f\\t%0,%1,%2
+ rsub.f\\t%0,%2,%1
+ sub.f\\t%0,%1,%2
+ sub.f\\t%0,%1,%2"
[(set_attr "type" "compare")
(set_attr "length" "4,4,4,4,8,8")])
@@ -3051,12 +3049,12 @@ archs4x, archs4xd"
;; N.B. sub[123] has the operands of the MINUS in the opposite order from
;; what synth_mult likes.
(define_insn "*sub_n"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
- (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,c,?Cal")
- (ashift:SI (match_operand:SI 2 "register_operand" "c,c,c")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r")
+ (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,r,?Cal")
+ (ashift:SI (match_operand:SI 2 "register_operand" "r,r,r")
(match_operand:SI 3 "_1_2_3_operand" ""))))]
""
- "sub%c3%? %0,%1,%2"
+ "sub%c3%?\\t%0,%1,%2"
[(set_attr "type" "shift")
(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")
@@ -3064,12 +3062,12 @@ archs4x, archs4xd"
(set_attr "iscompact" "false")])
(define_insn "*sub_n"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
- (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,c,?Cal")
- (mult:SI (match_operand:SI 2 "register_operand" "c,c,c")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r")
+ (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,r,?Cal")
+ (mult:SI (match_operand:SI 2 "register_operand" "r,r,r")
(match_operand:SI 3 "_2_4_8_operand" ""))))]
""
- "sub%z3%? %0,%1,%2"
+ "sub%z3%?\\t%0,%1,%2"
[(set_attr "type" "shift")
(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")
@@ -3078,12 +3076,12 @@ archs4x, archs4xd"
; ??? check if combine matches this.
(define_insn "*bset"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r")
(ior:SI (ashift:SI (const_int 1)
- (match_operand:SI 1 "nonmemory_operand" "cL,cL,c"))
- (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))]
+ (match_operand:SI 1 "nonmemory_operand" "rL,rL,r"))
+ (match_operand:SI 2 "nonmemory_operand" "0,r,Cal")))]
""
- "bset%? %0,%2,%1"
+ "bset%?\\t%0,%2,%1"
[(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")
(set_attr "cond" "canuse,nocond,nocond")]
@@ -3091,12 +3089,12 @@ archs4x, archs4xd"
; ??? check if combine matches this.
(define_insn "*bxor"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r")
(xor:SI (ashift:SI (const_int 1)
- (match_operand:SI 1 "nonmemory_operand" "cL,cL,c"))
- (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))]
+ (match_operand:SI 1 "nonmemory_operand" "rL,rL,r"))
+ (match_operand:SI 2 "nonmemory_operand" "0,r,Cal")))]
""
- "bxor%? %0,%2,%1"
+ "bxor%?\\t%0,%2,%1"
[(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")
(set_attr "cond" "canuse,nocond,nocond")]
@@ -3104,12 +3102,12 @@ archs4x, archs4xd"
; ??? check if combine matches this.
(define_insn "*bclr"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r")
(and:SI (not:SI (ashift:SI (const_int 1)
- (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")))
- (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))]
+ (match_operand:SI 1 "nonmemory_operand" "rL,rL,r")))
+ (match_operand:SI 2 "nonmemory_operand" "0,r,Cal")))]
""
- "bclr%? %0,%2,%1"
+ "bclr%?\\t%0,%2,%1"
[(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")
(set_attr "cond" "canuse,nocond,nocond")]
@@ -3121,15 +3119,15 @@ archs4x, archs4xd"
; see also iorsi3 for use with constant bit number.
(define_insn "*bset_insn"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
- (ior:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r")
+ (ior:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal")
(ashift:SI (const_int 1)
- (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ]
+ (match_operand:SI 2 "nonmemory_operand" "rL,rL,r"))) ) ]
""
"@
- bset%? %0,%1,%2 ;;peep2, constr 1
- bset %0,%1,%2 ;;peep2, constr 2
- bset %0,%1,%2 ;;peep2, constr 3"
+ bset%?\\t%0,%1,%2 ;;peep2, constr 1
+ bset\\t%0,%1,%2 ;;peep2, constr 2
+ bset\\t%0,%1,%2 ;;peep2, constr 3"
[(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")
(set_attr "cond" "canuse,nocond,nocond")]
@@ -3137,15 +3135,15 @@ archs4x, archs4xd"
; see also xorsi3 for use with constant bit number.
(define_insn "*bxor_insn"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
- (xor:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r")
+ (xor:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal")
(ashift:SI (const_int 1)
- (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ]
+ (match_operand:SI 2 "nonmemory_operand" "rL,rL,r"))) ) ]
""
"@
- bxor%? %0,%1,%2
- bxor %0,%1,%2
- bxor %0,%1,%2"
+ bxor%?\\t%0,%1,%2
+ bxor\\t%0,%1,%2
+ bxor\\t%0,%1,%2"
[(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")
(set_attr "cond" "canuse,nocond,nocond")]
@@ -3153,15 +3151,15 @@ archs4x, archs4xd"
; see also andsi3 for use with constant bit number.
(define_insn "*bclr_insn"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r")
(and:SI (not:SI (ashift:SI (const_int 1)
- (match_operand:SI 2 "nonmemory_operand" "cL,rL,r")))
- (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")))]
+ (match_operand:SI 2 "nonmemory_operand" "rL,rL,r")))
+ (match_operand:SI 1 "nonmemory_operand" "0,r,Cal")))]
""
"@
- bclr%? %0,%1,%2
- bclr %0,%1,%2
- bclr %0,%1,%2"
+ bclr%?\\t%0,%1,%2
+ bclr\\t%0,%1,%2
+ bclr\\t%0,%1,%2"
[(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")
(set_attr "cond" "canuse,nocond,nocond")]
@@ -3169,17 +3167,17 @@ archs4x, archs4xd"
; see also andsi3 for use with constant bit number.
(define_insn "*bmsk_insn"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
- (and:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r")
+ (and:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal")
(plus:SI (ashift:SI (const_int 1)
(plus:SI (match_operand:SI 2 "nonmemory_operand" "rL,rL,r")
(const_int 1)))
(const_int -1))))]
""
"@
- bmsk%? %0,%1,%2
- bmsk %0,%1,%2
- bmsk %0,%1,%2"
+ bmsk%?\\t%0,%1,%2
+ bmsk\\t%0,%1,%2
+ bmsk\\t%0,%1,%2"
[(set_attr "length" "4,4,8")
(set_attr "predicable" "yes,no,no")
(set_attr "cond" "canuse,nocond,nocond")]
@@ -3282,18 +3280,18 @@ archs4x, archs4xd"
;;bic define_insn that allows limm to be the first operand
(define_insn "*bicsi3_insn"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,Rcw,Rcw,w,w,w")
- (and:SI (not:SI (match_operand:SI 1 "nonmemory_operand" "Rcqq,Lc,I,Cal,Lc,Cal,c"))
- (match_operand:SI 2 "nonmemory_operand" "0,0,0,0,c,c,Cal")))]
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,r,r,r,r,r,r")
+ (and:SI (not:SI (match_operand:SI 1 "nonmemory_operand" "q,Lr,I,Cal,Lr,Cal,r"))
+ (match_operand:SI 2 "nonmemory_operand" "0,0,0,0,r,r,Cal")))]
""
"@
- bic%? %0, %2, %1%& ;;constraint 0
- bic%? %0,%2,%1 ;;constraint 1
- bic %0,%2,%1 ;;constraint 2, FIXME: will it ever get generated ???
- bic%? %0,%2,%1 ;;constraint 3, FIXME: will it ever get generated ???
- bic %0,%2,%1 ;;constraint 4
- bic %0,%2,%1 ;;constraint 5, FIXME: will it ever get generated ???
- bic %0,%2,%1 ;;constraint 6"
+ bic%?\\t%0, %2, %1%& ;;constraint 0
+ bic%?\\t%0,%2,%1 ;;constraint 1
+ bic\\t%0,%2,%1 ;;constraint 2, FIXME: will it ever get generated ???
+ bic%?\\t%0,%2,%1 ;;constraint 3, FIXME: will it ever get generated ???
+ bic\\t%0,%2,%1 ;;constraint 4
+ bic\\t%0,%2,%1 ;;constraint 5, FIXME: will it ever get generated ???
+ bic\\t%0,%2,%1 ;;constraint 6"
[(set_attr "length" "*,4,4,8,4,8,8")
(set_attr "iscompact" "maybe, false, false, false, false, false, false")
(set_attr "predicable" "no,yes,no,yes,no,no,no")
@@ -3334,19 +3332,19 @@ archs4x, archs4xd"
(set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,nocond,canuse,nocond")])
(define_insn "xorsi3"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcw,Rcw,Rcw,Rcw, w, w,w, w, w")
- (xor:SI (match_operand:SI 1 "register_operand" "%0, Rcq, 0, c, 0, 0, c, c,0, 0, c")
- (match_operand:SI 2 "nonmemory_operand" " Rcqq, 0, cL, 0,C0p, I,cL,C0p,I,Cal,Cal")))]
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, r,r, r,r, r, r,r, r, r")
+ (xor:SI (match_operand:SI 1 "register_operand" "%0,q, 0,r, 0,0, r, r,0, 0, r")
+ (match_operand:SI 2 "nonmemory_operand" "q,0,rL,0,C0p,I,rL,C0p,I,Cal,Cal")))]
""
"*
switch (which_alternative)
{
case 0: case 2: case 5: case 6: case 8: case 9: case 10:
- return \"xor%? %0,%1,%2%&\";
+ return \"xor%?\\t%0,%1,%2%&\";
case 1: case 3:
- return \"xor%? %0,%2,%1%&\";
+ return \"xor%?\\t%0,%2,%1%&\";
case 4: case 7:
- return \"bxor%? %0,%1,%z2\";
+ return \"bxor%?\\t%0,%1,%z2\";
default:
gcc_unreachable ();
}
@@ -3358,17 +3356,17 @@ archs4x, archs4xd"
(set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")])
(define_insn "negsi2"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,Rcw,w")
- (neg:SI (match_operand:SI 1 "register_operand" "0,Rcqq,0,c")))]
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,q,r,r")
+ (neg:SI (match_operand:SI 1 "register_operand" "0,q,0,r")))]
""
- "neg%? %0,%1%&"
+ "neg%?\\t%0,%1%&"
[(set_attr "type" "unary")
(set_attr "iscompact" "maybe,true,false,false")
(set_attr "predicable" "no,no,yes,no")])
(define_insn "one_cmplsi2"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w")
- (not:SI (match_operand:SI 1 "register_operand" "Rcqq,c")))]
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,w")
+ (not:SI (match_operand:SI 1 "register_operand" "q,c")))]
""
"not%? %0,%1%&"
[(set_attr "type" "unary,unary")
@@ -3498,14 +3496,14 @@ archs4x, archs4xd"
(set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")])
(define_insn "*lshrsi3_insn"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq,Rcw, w, w")
- (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, c,cCal")
- (match_operand:SI 2 "nonmemory_operand" "N, N,RcqqM, cL,cL,cCal")))]
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, q, r, r, r")
+ (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,q, 0, 0, r,rCal")
+ (match_operand:SI 2 "nonmemory_operand" "N,N,qM,rL,rL,rCal")))]
"TARGET_BARREL_SHIFTER
&& (register_operand (operands[1], SImode)
|| register_operand (operands[2], SImode))"
"*return (which_alternative <= 1 && !arc_ccfsm_cond_exec_p ()
- ? \"lsr%? %0,%1%&\" : \"lsr%? %0,%1,%2%&\");"
+ ? \"lsr%?\\t%0,%1%&\" : \"lsr%?\\t%0,%1,%2%&\");"
[(set_attr "type" "shift")
(set_attr "iscompact" "maybe,maybe,maybe,false,false,false")
(set_attr "predicable" "no,no,no,yes,no,no")
@@ -3546,8 +3544,8 @@ archs4x, archs4xd"
;; modifed cc user if second, but not first operand is a compact register.
(define_insn "cmpsi_cc_insn_mixed"
[(set (reg:CC CC_REG)
- (compare:CC (match_operand:SI 0 "register_operand" "Rcq#q,Rcqq, h, c, c,qRcq,c")
- (match_operand:SI 1 "nonmemory_operand" "cO, hO,Cm1,cI,cL, Cal,Cal")))]
+ (compare:CC (match_operand:SI 0 "register_operand" "q, q, h, c, c, q,c")
+ (match_operand:SI 1 "nonmemory_operand" "cO,hO,Cm1,cI,cL,Cal,Cal")))]
""
"cmp%? %0,%B1%&"
[(set_attr "type" "compare")
@@ -3559,7 +3557,7 @@ archs4x, archs4xd"
(define_insn "*cmpsi_cc_zn_insn"
[(set (reg:CC_ZN CC_REG)
- (compare:CC_ZN (match_operand:SI 0 "register_operand" "qRcq,c")
+ (compare:CC_ZN (match_operand:SI 0 "register_operand" "q,c")
(const_int 0)))]
""
"tst%? %0,%0%&"
@@ -3573,7 +3571,7 @@ archs4x, archs4xd"
(define_insn "*btst"
[(set (reg:CC_ZN CC_REG)
(compare:CC_ZN
- (zero_extract:SI (match_operand:SI 0 "register_operand" "Rcqq,c")
+ (zero_extract:SI (match_operand:SI 0 "register_operand" "q,c")
(const_int 1)
(match_operand:SI 1 "nonmemory_operand" "L,Lc"))
(const_int 0)))]
@@ -3618,7 +3616,7 @@ archs4x, archs4xd"
(define_insn "*cmpsi_cc_z_insn"
[(set (reg:CC_Z CC_REG)
- (compare:CC_Z (match_operand:SI 0 "register_operand" "qRcq,c")
+ (compare:CC_Z (match_operand:SI 0 "register_operand" "q,c")
(match_operand:SI 1 "p2_immediate_operand" "O,n")))]
""
"@
@@ -3631,8 +3629,8 @@ archs4x, archs4xd"
(define_insn "*cmpsi_cc_c_insn"
[(set (reg:CC_C CC_REG)
- (compare:CC_C (match_operand:SI 0 "register_operand" "Rcqq,Rcqq, h, c,Rcqq, c")
- (match_operand:SI 1 "nonmemory_operand" "cO, hO,Cm1,cI, Cal,Cal")))]
+ (compare:CC_C (match_operand:SI 0 "register_operand" "q, q, h, c, q, c")
+ (match_operand:SI 1 "nonmemory_operand" "cO,hO,Cm1,cI,Cal,Cal")))]
""
"cmp%? %0,%1%&"
[(set_attr "type" "compare")
@@ -3944,7 +3942,7 @@ archs4x, archs4xd"
(const_int 2)))])
(define_insn "indirect_jump"
- [(set (pc) (match_operand:SI 0 "nonmemory_operand" "L,I,Cal,Rcqq,r"))]
+ [(set (pc) (match_operand:SI 0 "nonmemory_operand" "L,I,Cal,q,r"))]
""
"@
j%!%* %0%&
@@ -4076,7 +4074,7 @@ archs4x, archs4xd"
; Unlike the canonical tablejump, this pattern always uses a jump address,
; even for CASE_VECTOR_PC_RELATIVE.
(define_insn "casesi_jump"
- [(set (pc) (match_operand:SI 0 "register_operand" "Cal,Rcqq,c"))
+ [(set (pc) (match_operand:SI 0 "register_operand" "Cal,q,c"))
(use (label_ref (match_operand 1 "" "")))]
""
"j%!%* [%0]%&"
@@ -4106,18 +4104,16 @@ archs4x, archs4xd"
}
")
-; Rcq, which is used in alternative 0, checks for conditional execution.
; At instruction output time, if it doesn't match and we end up with
; alternative 1 ("q"), that means that we can't use the short form.
(define_insn "*call_i"
[(call (mem:SI (match_operand:SI 0
- "call_address_operand" "Rcq,q,c,Cji,Csc,Cbp,Cbr,L,I,Cal"))
+ "call_address_operand" "q,c,Cji,Csc,Cbp,Cbr,L,I,Cal"))
(match_operand 1 "" ""))
(clobber (reg:SI 31))]
""
"@
jl%!%* [%0]%&
- jl%!%* [%0]%&
jl%!%* [%0]
jli_s %S0
sjli %S0
@@ -4126,10 +4122,10 @@ archs4x, archs4xd"
jl%!%* %0
jl%* %0
jl%! %0"
- [(set_attr "type" "call,call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot")
- (set_attr "iscompact" "maybe,false,*,true,*,*,*,*,*,*")
- (set_attr "predicable" "no,no,yes,no,no,yes,no,yes,no,yes")
- (set_attr "length" "*,*,4,2,4,4,4,4,4,8")])
+ [(set_attr "type" "call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot")
+ (set_attr "iscompact" "maybe,*,true,*,*,*,*,*,*")
+ (set_attr "predicable" "no,yes,no,no,yes,no,yes,no,yes")
+ (set_attr "length" "*,4,2,4,4,4,4,4,8")])
(define_expand "call_value"
;; operand 2 is stack_size_rtx
@@ -4151,19 +4147,17 @@ archs4x, archs4xd"
XEXP (operands[1], 0) = force_reg (Pmode, callee);
}")
-; Rcq, which is used in alternative 0, checks for conditional execution.
; At instruction output time, if it doesn't match and we end up with
; alternative 1 ("q"), that means that we can't use the short form.
(define_insn "*call_value_i"
- [(set (match_operand 0 "dest_reg_operand" "=Rcq,q,w, w, w, w, w,w,w, w")
+ [(set (match_operand 0 "dest_reg_operand" "=q,w, w, w, w, w,w,w, w")
(call (mem:SI (match_operand:SI 1
- "call_address_operand" "Rcq,q,c,Cji,Csc,Cbp,Cbr,L,I,Cal"))
+ "call_address_operand" "q,c,Cji,Csc,Cbp,Cbr,L,I,Cal"))
(match_operand 2 "" "")))
(clobber (reg:SI 31))]
""
"@
jl%!%* [%1]%&
- jl%!%* [%1]%&
jl%!%* [%1]
jli_s %S1
sjli %S1
@@ -4172,10 +4166,10 @@ archs4x, archs4xd"
jl%!%* %1
jl%* %1
jl%! %1"
- [(set_attr "type" "call,call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot")
- (set_attr "iscompact" "maybe,false,*,true,false,*,*,*,*,*")
- (set_attr "predicable" "no,no,yes,no,no,yes,no,yes,no,yes")
- (set_attr "length" "*,*,4,2,4,4,4,4,4,8")])
+ [(set_attr "type" "call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot")
+ (set_attr "iscompact" "maybe,*,true,false,*,*,*,*,*")
+ (set_attr "predicable" "no,yes,no,no,yes,no,yes,no,yes")
+ (set_attr "length" "*,4,2,4,4,4,4,4,8")])
; There is a bl_s instruction (16 bit opcode branch-and-link), but we can't
; use it for lack of inter-procedural branch shortening.
@@ -4943,7 +4937,7 @@ archs4x, archs4xd"
[(set (pc)
(if_then_else
(match_operator 3 "equality_comparison_operator"
- [(zero_extract:SI (match_operand:SI 1 "register_operand" "Rcqq,c")
+ [(zero_extract:SI (match_operand:SI 1 "register_operand" "q,c")
(const_int 1)
(match_operand:SI 2 "nonmemory_operand" "L,Lc"))
(const_int 0)])
@@ -5153,20 +5147,20 @@ archs4x, archs4xd"
(set_attr "predicable" "yes")])
(define_insn "abssf2"
- [(set (match_operand:SF 0 "dest_reg_operand" "=Rcq#q,Rcw,w")
- (abs:SF (match_operand:SF 1 "register_operand" "0, 0,c")))]
+ [(set (match_operand:SF 0 "dest_reg_operand" "=q,r,r")
+ (abs:SF (match_operand:SF 1 "register_operand" "0,0,r")))]
""
- "bclr%? %0,%1,31%&"
+ "bclr%?\\t%0,%1,31%&"
[(set_attr "type" "unary")
(set_attr "iscompact" "maybe,false,false")
(set_attr "length" "2,4,4")
(set_attr "predicable" "no,yes,no")])
(define_insn "negsf2"
- [(set (match_operand:SF 0 "dest_reg_operand" "=Rcw,w")
- (neg:SF (match_operand:SF 1 "register_operand" "0,c")))]
+ [(set (match_operand:SF 0 "dest_reg_operand" "=r,r")
+ (neg:SF (match_operand:SF 1 "register_operand" "0,r")))]
""
- "bxor%? %0,%1,31"
+ "bxor%?\\t%0,%1,31"
[(set_attr "type" "unary")
(set_attr "predicable" "yes,no")])
@@ -5966,8 +5960,8 @@ archs4x, archs4xd"
(set_attr "length" "4")])
(define_insn "*ashlsi2_cnt1"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w")
- (ashift:SI (match_operand:SI 1 "register_operand" "Rcqq,c")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,w")
+ (ashift:SI (match_operand:SI 1 "register_operand" "q,c")
(const_int 1)))]
""
"asl%? %0,%1%&"
@@ -5999,8 +5993,8 @@ archs4x, archs4xd"
(set_attr "predicable" "no")])
(define_insn "*lshrsi3_cnt1"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w")
- (lshiftrt:SI (match_operand:SI 1 "register_operand" "Rcqq,c")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,w")
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "q,c")
(const_int 1)))]
""
"lsr%? %0,%1%&"
@@ -6009,8 +6003,8 @@ archs4x, archs4xd"
(set_attr "predicable" "no,no")])
(define_insn "*ashrsi3_cnt1"
- [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w")
- (ashiftrt:SI (match_operand:SI 1 "register_operand" "Rcqq,c")
+ [(set (match_operand:SI 0 "dest_reg_operand" "=q,w")
+ (ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c")
(const_int 1)))]
""
"asr%? %0,%1%&"
@@ -6141,7 +6135,7 @@ archs4x, archs4xd"
(set_attr "length" "36")])
(define_insn "macd"
- [(set (match_operand:DI 0 "even_register_operand" "=Rcr,r,r")
+ [(set (match_operand:DI 0 "even_register_operand" "=r,r,r")
(plus:DI
(mult:DI
(sign_extend:DI (match_operand:SI 1 "register_operand" "%0,r,r"))
@@ -6243,7 +6237,7 @@ archs4x, archs4xd"
(set_attr "length" "36")])
(define_insn "macdu"
- [(set (match_operand:DI 0 "even_register_operand" "=Rcr,r,r")
+ [(set (match_operand:DI 0 "even_register_operand" "=r,r,r")
(plus:DI
(mult:DI
(zero_extend:DI (match_operand:SI 1 "register_operand" "%0,r,r"))
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 0add5a2..b5827325 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -308,12 +308,14 @@ Target Ignore
Does nothing. Preserved for backward compatibility.
mRcq
-Target Var(TARGET_Rcq)
-Enable Rcq constraint handling - most short code generation depends on this.
+Target Ignore
+Does nothing. Preserved for backward compatibility.
+
mRcw
-Target Var(TARGET_Rcw)
-Enable Rcw constraint handling - ccfsm condexec mostly depends on this.
+Target Ignore
+Does nothing. Preserved for backward compatibility.
+
mearly-cbranchsi
Target Var(TARGET_EARLY_CBRANCHSI)
diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md
index 02aa37f..38bda12 100644
--- a/gcc/config/arc/constraints.md
+++ b/gcc/config/arc/constraints.md
@@ -432,50 +432,6 @@
&& !arc_legitimate_pic_addr_p (op)
&& !(satisfies_constraint_I (op) && optimize_size)"))
-; Note that the 'cryptic' register constraints will not make reload use the
-; associated class to reload into, but this will not penalize reloading of any
-; other operands, or using an alternate part of the same alternative.
-
-; Rcq is different in three important ways from a register class constraint:
-; - It does not imply a register class, hence reload will not use it to drive
-; reloads.
-; - It matches even when there is no register class to describe its accepted
-; set; not having such a set again lessens the impact on register allocation.
-; - It won't match when the instruction is conditionalized by the ccfsm.
-(define_constraint "Rcq"
- "@internal
- Cryptic q - for short insn generation while not affecting register allocation
- Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3},
- @code{r12}-@code{r15}"
- (and (match_code "reg")
- (match_test "TARGET_Rcq
- && !arc_ccfsm_cond_exec_p ()
- && IN_RANGE (REGNO (op) ^ 4, 4, 11)")))
-
-; If we need a reload, we generally want to steer reload to use three-address
-; alternatives in preference of two-address alternatives, unless the
-; three-address alternative introduces a LIMM that is unnecessary for the
-; two-address alternative.
-(define_constraint "Rcw"
- "@internal
- Cryptic w - for use in early alternatives with matching constraint"
- (and (match_code "reg")
- (match_test
- "TARGET_Rcw
- && REGNO (op) < FIRST_PSEUDO_REGISTER
- && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS],
- REGNO (op))")))
-
-(define_constraint "Rcr"
- "@internal
- Cryptic r - for use in early alternatives with matching constraint"
- (and (match_code "reg")
- (match_test
- "TARGET_Rcw
- && REGNO (op) < FIRST_PSEUDO_REGISTER
- && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS],
- REGNO (op))")))
-
(define_constraint "Rcb"
"@internal
Stack Pointer register @code{r28} - do not reload into its class"
diff --git a/gcc/config/gcn/gcn-modes.def b/gcc/config/gcn/gcn-modes.def
index 82585de..1b8a320 100644
--- a/gcc/config/gcn/gcn-modes.def
+++ b/gcc/config/gcn/gcn-modes.def
@@ -29,6 +29,48 @@ VECTOR_MODE (FLOAT, HF, 64); /* V64HF */
VECTOR_MODE (FLOAT, SF, 64); /* V64SF */
VECTOR_MODE (FLOAT, DF, 64); /* V64DF */
+/* Artificial vector modes, for when vector masking doesn't work (yet). */
+VECTOR_MODE (INT, QI, 32); /* V32QI */
+VECTOR_MODE (INT, HI, 32); /* V32HI */
+VECTOR_MODE (INT, SI, 32); /* V32SI */
+VECTOR_MODE (INT, DI, 32); /* V32DI */
+VECTOR_MODE (INT, TI, 32); /* V32TI */
+VECTOR_MODE (FLOAT, HF, 32); /* V32HF */
+VECTOR_MODE (FLOAT, SF, 32); /* V32SF */
+VECTOR_MODE (FLOAT, DF, 32); /* V32DF */
+VECTOR_MODE (INT, QI, 16); /* V16QI */
+VECTOR_MODE (INT, HI, 16); /* V16HI */
+VECTOR_MODE (INT, SI, 16); /* V16SI */
+VECTOR_MODE (INT, DI, 16); /* V16DI */
+VECTOR_MODE (INT, TI, 16); /* V16TI */
+VECTOR_MODE (FLOAT, HF, 16); /* V16HF */
+VECTOR_MODE (FLOAT, SF, 16); /* V16SF */
+VECTOR_MODE (FLOAT, DF, 16); /* V16DF */
+VECTOR_MODE (INT, QI, 8); /* V8QI */
+VECTOR_MODE (INT, HI, 8); /* V8HI */
+VECTOR_MODE (INT, SI, 8); /* V8SI */
+VECTOR_MODE (INT, DI, 8); /* V8DI */
+VECTOR_MODE (INT, TI, 8); /* V8TI */
+VECTOR_MODE (FLOAT, HF, 8); /* V8HF */
+VECTOR_MODE (FLOAT, SF, 8); /* V8SF */
+VECTOR_MODE (FLOAT, DF, 8); /* V8DF */
+VECTOR_MODE (INT, QI, 4); /* V4QI */
+VECTOR_MODE (INT, HI, 4); /* V4HI */
+VECTOR_MODE (INT, SI, 4); /* V4SI */
+VECTOR_MODE (INT, DI, 4); /* V4DI */
+VECTOR_MODE (INT, TI, 4); /* V4TI */
+VECTOR_MODE (FLOAT, HF, 4); /* V4HF */
+VECTOR_MODE (FLOAT, SF, 4); /* V4SF */
+VECTOR_MODE (FLOAT, DF, 4); /* V4DF */
+VECTOR_MODE (INT, QI, 2); /* V2QI */
+VECTOR_MODE (INT, HI, 2); /* V2HI */
+VECTOR_MODE (INT, SI, 2); /* V2SI */
+VECTOR_MODE (INT, DI, 2); /* V2DI */
+VECTOR_MODE (INT, TI, 2); /* V2TI */
+VECTOR_MODE (FLOAT, HF, 2); /* V2HF */
+VECTOR_MODE (FLOAT, SF, 2); /* V2SF */
+VECTOR_MODE (FLOAT, DF, 2); /* V2DF */
+
/* Vector units handle reads independently and thus no large alignment
needed. */
ADJUST_ALIGNMENT (V64QI, 1);
@@ -39,3 +81,43 @@ ADJUST_ALIGNMENT (V64TI, 16);
ADJUST_ALIGNMENT (V64HF, 2);
ADJUST_ALIGNMENT (V64SF, 4);
ADJUST_ALIGNMENT (V64DF, 8);
+ADJUST_ALIGNMENT (V32QI, 1);
+ADJUST_ALIGNMENT (V32HI, 2);
+ADJUST_ALIGNMENT (V32SI, 4);
+ADJUST_ALIGNMENT (V32DI, 8);
+ADJUST_ALIGNMENT (V32TI, 16);
+ADJUST_ALIGNMENT (V32HF, 2);
+ADJUST_ALIGNMENT (V32SF, 4);
+ADJUST_ALIGNMENT (V32DF, 8);
+ADJUST_ALIGNMENT (V16QI, 1);
+ADJUST_ALIGNMENT (V16HI, 2);
+ADJUST_ALIGNMENT (V16SI, 4);
+ADJUST_ALIGNMENT (V16DI, 8);
+ADJUST_ALIGNMENT (V16TI, 16);
+ADJUST_ALIGNMENT (V16HF, 2);
+ADJUST_ALIGNMENT (V16SF, 4);
+ADJUST_ALIGNMENT (V16DF, 8);
+ADJUST_ALIGNMENT (V8QI, 1);
+ADJUST_ALIGNMENT (V8HI, 2);
+ADJUST_ALIGNMENT (V8SI, 4);
+ADJUST_ALIGNMENT (V8DI, 8);
+ADJUST_ALIGNMENT (V8TI, 16);
+ADJUST_ALIGNMENT (V8HF, 2);
+ADJUST_ALIGNMENT (V8SF, 4);
+ADJUST_ALIGNMENT (V8DF, 8);
+ADJUST_ALIGNMENT (V4QI, 1);
+ADJUST_ALIGNMENT (V4HI, 2);
+ADJUST_ALIGNMENT (V4SI, 4);
+ADJUST_ALIGNMENT (V4DI, 8);
+ADJUST_ALIGNMENT (V4TI, 16);
+ADJUST_ALIGNMENT (V4HF, 2);
+ADJUST_ALIGNMENT (V4SF, 4);
+ADJUST_ALIGNMENT (V4DF, 8);
+ADJUST_ALIGNMENT (V2QI, 1);
+ADJUST_ALIGNMENT (V2HI, 2);
+ADJUST_ALIGNMENT (V2SI, 4);
+ADJUST_ALIGNMENT (V2DI, 8);
+ADJUST_ALIGNMENT (V2TI, 16);
+ADJUST_ALIGNMENT (V2HF, 2);
+ADJUST_ALIGNMENT (V2SF, 4);
+ADJUST_ALIGNMENT (V2DF, 8);
diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h
index ca80460..f9a1fc0 100644
--- a/gcc/config/gcn/gcn-protos.h
+++ b/gcc/config/gcn/gcn-protos.h
@@ -24,6 +24,8 @@ extern bool gcn_constant64_p (rtx);
extern bool gcn_constant_p (rtx);
extern rtx gcn_convert_mask_mode (rtx reg);
extern unsigned int gcn_dwarf_register_number (unsigned int regno);
+extern rtx get_exec (int64_t);
+extern rtx get_exec (machine_mode mode);
extern char * gcn_expand_dpp_shr_insn (machine_mode, const char *, int, int);
extern void gcn_expand_epilogue ();
extern rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets,
@@ -34,8 +36,6 @@ extern rtx gcn_expand_scalar_to_vector_address (machine_mode, rtx, rtx, rtx);
extern void gcn_expand_vector_init (rtx, rtx);
extern bool gcn_flat_address_p (rtx, machine_mode);
extern bool gcn_fp_constant_p (rtx, bool);
-extern rtx gcn_full_exec ();
-extern rtx gcn_full_exec_reg ();
extern rtx gcn_gen_undef (machine_mode);
extern bool gcn_global_address_p (rtx);
extern tree gcn_goacc_adjust_private_decl (location_t, tree var, int level);
@@ -67,8 +67,6 @@ extern rtx gcn_operand_part (machine_mode, rtx, int);
extern bool gcn_regno_mode_code_ok_for_base_p (int, machine_mode,
addr_space_t, int, int);
extern reg_class gcn_regno_reg_class (int regno);
-extern rtx gcn_scalar_exec ();
-extern rtx gcn_scalar_exec_reg ();
extern bool gcn_scalar_flat_address_p (rtx);
extern bool gcn_scalar_flat_mem_p (rtx);
extern bool gcn_sgpr_move_p (rtx, rtx);
@@ -105,9 +103,11 @@ extern gimple_opt_pass *make_pass_omp_gcn (gcc::context *ctxt);
inline bool
vgpr_1reg_mode_p (machine_mode mode)
{
- return (mode == SImode || mode == SFmode || mode == HImode || mode == QImode
- || mode == V64QImode || mode == V64HImode || mode == V64SImode
- || mode == V64HFmode || mode == V64SFmode || mode == BImode);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+
+ return (mode == SImode || mode == SFmode || mode == HImode || mode == HFmode
+ || mode == QImode || mode == BImode);
}
/* Return true if MODE is valid for 1 SGPR register. */
@@ -124,8 +124,10 @@ sgpr_1reg_mode_p (machine_mode mode)
inline bool
vgpr_2reg_mode_p (machine_mode mode)
{
- return (mode == DImode || mode == DFmode
- || mode == V64DImode || mode == V64DFmode);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+
+ return (mode == DImode || mode == DFmode);
}
/* Return true if MODE can be handled directly by VGPR operations. */
@@ -133,9 +135,7 @@ vgpr_2reg_mode_p (machine_mode mode)
inline bool
vgpr_vector_mode_p (machine_mode mode)
{
- return (mode == V64QImode || mode == V64HImode
- || mode == V64SImode || mode == V64DImode
- || mode == V64HFmode || mode == V64SFmode || mode == V64DFmode);
+ return VECTOR_MODE_P (mode);
}
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index dec81e8..00c0e3b 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -17,88 +17,243 @@
;; {{{ Vector iterators
; Vector modes for specific types
-; (This will make more sense when there are multiple vector sizes)
(define_mode_iterator V_QI
- [V64QI])
+ [V2QI V4QI V8QI V16QI V32QI V64QI])
(define_mode_iterator V_HI
- [V64HI])
+ [V2HI V4HI V8HI V16HI V32HI V64HI])
(define_mode_iterator V_HF
- [V64HF])
+ [V2HF V4HF V8HF V16HF V32HF V64HF])
(define_mode_iterator V_SI
- [V64SI])
+ [V2SI V4SI V8SI V16SI V32SI V64SI])
(define_mode_iterator V_SF
- [V64SF])
+ [V2SF V4SF V8SF V16SF V32SF V64SF])
(define_mode_iterator V_DI
- [V64DI])
+ [V2DI V4DI V8DI V16DI V32DI V64DI])
(define_mode_iterator V_DF
- [V64DF])
+ [V2DF V4DF V8DF V16DF V32DF V64DF])
+
+(define_mode_iterator V64_SI
+ [V64SI])
+(define_mode_iterator V64_DI
+ [V64DI])
; Vector modes for sub-dword modes
(define_mode_iterator V_QIHI
- [V64QI V64HI])
+ [V2QI V2HI
+ V4QI V4HI
+ V8QI V8HI
+ V16QI V16HI
+ V32QI V32HI
+ V64QI V64HI])
; Vector modes for one vector register
(define_mode_iterator V_1REG
- [V64QI V64HI V64SI V64HF V64SF])
+ [V2QI V2HI V2SI V2HF V2SF
+ V4QI V4HI V4SI V4HF V4SF
+ V8QI V8HI V8SI V8HF V8SF
+ V16QI V16HI V16SI V16HF V16SF
+ V32QI V32HI V32SI V32HF V32SF
+ V64QI V64HI V64SI V64HF V64SF])
(define_mode_iterator V_INT_1REG
- [V64QI V64HI V64SI])
+ [V2QI V2HI V2SI
+ V4QI V4HI V4SI
+ V8QI V8HI V8SI
+ V16QI V16HI V16SI
+ V32QI V32HI V32SI
+ V64QI V64HI V64SI])
(define_mode_iterator V_INT_1REG_ALT
- [V64QI V64HI V64SI])
+ [V2QI V2HI V2SI
+ V4QI V4HI V4SI
+ V8QI V8HI V8SI
+ V16QI V16HI V16SI
+ V32QI V32HI V32SI
+ V64QI V64HI V64SI])
(define_mode_iterator V_FP_1REG
- [V64HF V64SF])
+ [V2HF V2SF
+ V4HF V4SF
+ V8HF V8SF
+ V16HF V16SF
+ V32HF V32SF
+ V64HF V64SF])
+
+; V64_* modes are for where more general support is unimplemented
+; (e.g. reductions)
+(define_mode_iterator V64_1REG
+ [V64QI V64HI V64SI V64HF V64SF])
+(define_mode_iterator V64_INT_1REG
+ [V64QI V64HI V64SI])
; Vector modes for two vector registers
(define_mode_iterator V_2REG
+ [V2DI V2DF
+ V4DI V4DF
+ V8DI V8DF
+ V16DI V16DF
+ V32DI V32DF
+ V64DI V64DF])
+
+(define_mode_iterator V64_2REG
[V64DI V64DF])
; Vector modes with native support
(define_mode_iterator V_noQI
- [V64HI V64HF V64SI V64SF V64DI V64DF])
+ [V2HI V2HF V2SI V2SF V2DI V2DF
+ V4HI V4HF V4SI V4SF V4DI V4DF
+ V8HI V8HF V8SI V8SF V8DI V8DF
+ V16HI V16HF V16SI V16SF V16DI V16DF
+ V32HI V32HF V32SI V32SF V32DI V32DF
+ V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_noHI
- [V64HF V64SI V64SF V64DI V64DF])
+ [V2HF V2SI V2SF V2DI V2DF
+ V4HF V4SI V4SF V4DI V4DF
+ V8HF V8SI V8SF V8DI V8DF
+ V16HF V16SI V16SF V16DI V16DF
+ V32HF V32SI V32SF V32DI V32DF
+ V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_INT_noQI
- [V64HI V64SI V64DI])
+ [V2HI V2SI V2DI
+ V4HI V4SI V4DI
+ V8HI V8SI V8DI
+ V16HI V16SI V16DI
+ V32HI V32SI V32DI
+ V64HI V64SI V64DI])
(define_mode_iterator V_INT_noHI
- [V64SI V64DI])
+ [V2SI V2DI
+ V4SI V4DI
+ V8SI V8DI
+ V16SI V16DI
+ V32SI V32DI
+ V64SI V64DI])
; All of above
(define_mode_iterator V_ALL
- [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
+ [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
+ V4QI V4HI V4HF V4SI V4SF V4DI V4DF
+ V8QI V8HI V8HF V8SI V8SF V8DI V8DF
+ V16QI V16HI V16HF V16SI V16SF V16DI V16DF
+ V32QI V32HI V32HF V32SI V32SF V32DI V32DF
+ V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_ALL_ALT
- [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
+ [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
+ V4QI V4HI V4HF V4SI V4SF V4DI V4DF
+ V8QI V8HI V8HF V8SI V8SF V8DI V8DF
+ V16QI V16HI V16HF V16SI V16SF V16DI V16DF
+ V32QI V32HI V32HF V32SI V32SF V32DI V32DF
+ V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_INT
- [V64QI V64HI V64SI V64DI])
+ [V2QI V2HI V2SI V2DI
+ V4QI V4HI V4SI V4DI
+ V8QI V8HI V8SI V8DI
+ V16QI V16HI V16SI V16DI
+ V32QI V32HI V32SI V32DI
+ V64QI V64HI V64SI V64DI])
(define_mode_iterator V_FP
+ [V2HF V2SF V2DF
+ V4HF V4SF V4DF
+ V8HF V8SF V8DF
+ V16HF V16SF V16DF
+ V32HF V32SF V32DF
+ V64HF V64SF V64DF])
+
+(define_mode_iterator V64_ALL
+ [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
+(define_mode_iterator V64_FP
[V64HF V64SF V64DF])
(define_mode_attr scalar_mode
- [(V64QI "qi") (V64HI "hi") (V64SI "si")
+ [(V2QI "qi") (V2HI "hi") (V2SI "si")
+ (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
+ (V4QI "qi") (V4HI "hi") (V4SI "si")
+ (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
+ (V8QI "qi") (V8HI "hi") (V8SI "si")
+ (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
+ (V16QI "qi") (V16HI "hi") (V16SI "si")
+ (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
+ (V32QI "qi") (V32HI "hi") (V32SI "si")
+ (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
+ (V64QI "qi") (V64HI "hi") (V64SI "si")
(V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
(define_mode_attr SCALAR_MODE
- [(V64QI "QI") (V64HI "HI") (V64SI "SI")
+ [(V2QI "QI") (V2HI "HI") (V2SI "SI")
+ (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
+ (V4QI "QI") (V4HI "HI") (V4SI "SI")
+ (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
+ (V8QI "QI") (V8HI "HI") (V8SI "SI")
+ (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
+ (V16QI "QI") (V16HI "HI") (V16SI "SI")
+ (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
+ (V32QI "QI") (V32HI "HI") (V32SI "SI")
+ (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
+ (V64QI "QI") (V64HI "HI") (V64SI "SI")
(V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
(define_mode_attr vnsi
- [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
+ [(V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
+ (V2SF "v2si") (V2DI "v2si") (V2DF "v2si")
+ (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
+ (V4SF "v4si") (V4DI "v4si") (V4DF "v4si")
+ (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
+ (V8SF "v8si") (V8DI "v8si") (V8DF "v8si")
+ (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
+ (V16SF "v16si") (V16DI "v16si") (V16DF "v16si")
+ (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
+ (V32SF "v32si") (V32DI "v32si") (V32DF "v32si")
+ (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
(V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
(define_mode_attr VnSI
- [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
+ [(V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
+ (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI")
+ (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
+ (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI")
+ (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
+ (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI")
+ (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
+ (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI")
+ (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
+ (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI")
+ (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
(V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
(define_mode_attr vndi
- [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
+ [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
+ (V2SF "v2di") (V2DI "v2di") (V2DF "v2di")
+ (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
+ (V4SF "v4di") (V4DI "v4di") (V4DF "v4di")
+ (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
+ (V8SF "v8di") (V8DI "v8di") (V8DF "v8di")
+ (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
+ (V16SF "v16di") (V16DI "v16di") (V16DF "v16di")
+ (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
+ (V32SF "v32di") (V32DI "v32di") (V32DF "v32di")
+ (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
(V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
(define_mode_attr VnDI
- [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
+ [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
+ (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI")
+ (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
+ (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI")
+ (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
+ (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI")
+ (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
+ (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI")
+ (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
+ (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI")
+ (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
(V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
-(define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
+(define_mode_attr sdwa
+ [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
+ (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD")
+ (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD")
+ (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD")
+ (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD")
+ (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
;; }}}
;; {{{ Substitutions
@@ -180,6 +335,37 @@
(match_operand:V_ALL 1 "general_operand"))]
""
{
+ /* Bitwise reinterpret casts via SUBREG don't work with GCN vector
+ registers, but we can convert the MEM to a mode that does work. */
+ if (MEM_P (operands[0]) && !SUBREG_P (operands[0])
+ && SUBREG_P (operands[1])
+ && GET_MODE_SIZE (GET_MODE (operands[1]))
+ == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))))
+ {
+ rtx src = SUBREG_REG (operands[1]);
+ rtx mem = copy_rtx (operands[0]);
+ PUT_MODE_RAW (mem, GET_MODE (src));
+ emit_move_insn (mem, src);
+ DONE;
+ }
+ if (MEM_P (operands[1]) && !SUBREG_P (operands[1])
+ && SUBREG_P (operands[0])
+ && GET_MODE_SIZE (GET_MODE (operands[0]))
+ == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))))
+ {
+ rtx dest = SUBREG_REG (operands[0]);
+ rtx mem = copy_rtx (operands[1]);
+ PUT_MODE_RAW (mem, GET_MODE (dest));
+ emit_move_insn (dest, mem);
+ DONE;
+ }
+
+ /* SUBREG of MEM is not supported. */
+ gcc_assert ((!SUBREG_P (operands[0])
+ || !MEM_P (SUBREG_REG (operands[0])))
+ && (!SUBREG_P (operands[1])
+ || !MEM_P (SUBREG_REG (operands[1]))));
+
if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
{
operands[1] = force_reg (<MODE>mode, operands[1]);
@@ -622,6 +808,40 @@
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
+(define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>"
+ [(set (match_operand:V_ALL_ALT 0 "register_operand")
+ (vec_select:V_ALL_ALT
+ (match_operand:V_ALL 1 "register_operand")
+ (parallel [(match_operand 2 "immediate_operand")])))]
+ "MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)
+ && <V_ALL_ALT:SCALAR_MODE>mode == <V_ALL:SCALAR_MODE>mode"
+ {
+ int numlanes = GET_MODE_NUNITS (<V_ALL_ALT:MODE>mode);
+ int firstlane = INTVAL (operands[2]) * numlanes;
+ rtx tmp;
+
+ if (firstlane == 0)
+ {
+ /* A plain move will do. */
+ tmp = operands[1];
+ } else {
+ /* FIXME: optimize this by using DPP where available. */
+
+ rtx permutation = gen_reg_rtx (<V_ALL:VnSI>mode);
+ emit_insn (gen_vec_series<V_ALL:vnsi> (permutation,
+ GEN_INT (firstlane*4),
+ GEN_INT (4)));
+
+ tmp = gen_reg_rtx (<V_ALL:MODE>mode);
+ emit_insn (gen_ds_bpermute<V_ALL:mode> (tmp, permutation, operands[1],
+ get_exec (<V_ALL:MODE>mode)));
+ }
+
+ emit_move_insn (operands[0],
+ gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0));
+ DONE;
+ })
+
(define_expand "extract_last_<mode>"
[(match_operand:<SCALAR_MODE> 0 "register_operand")
(match_operand:DI 1 "gcn_alu_operand")
@@ -673,6 +893,16 @@
DONE;
})
+(define_expand "vec_init<V_ALL:mode><V_ALL_ALT:mode>"
+ [(match_operand:V_ALL 0 "register_operand")
+ (match_operand:V_ALL_ALT 1)]
+ "<V_ALL:SCALAR_MODE>mode == <V_ALL_ALT:SCALAR_MODE>mode
+ && MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)"
+ {
+ gcn_expand_vector_init (operands[0], operands[1]);
+ DONE;
+ })
+
;; }}}
;; {{{ Scatter / Gather
@@ -2161,6 +2391,19 @@
(set_attr "length" "8,8")])
;; }}}
+;; {{{ Int unops
+
+(define_expand "neg<mode>2"
+ [(match_operand:V_INT 0 "register_operand")
+ (match_operand:V_INT 1 "register_operand")]
+ ""
+ {
+ emit_insn (gen_sub<mode>3 (operands[0], gcn_vec_constant (<MODE>mode, 0),
+ operands[1]));
+ DONE;
+ })
+
+;; }}}
;; {{{ FP binops - special cases
; GCN does not directly provide a DFmode subtract instruction, so we do it by
@@ -2419,10 +2662,10 @@
(set_attr "length" "8")])
(define_insn "ldexp<mode>3<exec>"
- [(set (match_operand:V_FP 0 "register_operand" "=v")
+ [(set (match_operand:V_FP 0 "register_operand" "= v")
(unspec:V_FP
- [(match_operand:V_FP 1 "gcn_alu_operand" "vB")
- (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")]
+ [(match_operand:V_FP 1 "gcn_alu_operand" " vB")
+ (match_operand:<VnSI> 2 "gcn_alu_operand" "vSvA")]
UNSPEC_LDEXP))]
""
"v_ldexp%i0\t%0, %1, %2"
@@ -2452,8 +2695,8 @@
(set_attr "length" "8")])
(define_insn "frexp<mode>_exp2<exec>"
- [(set (match_operand:V64SI 0 "register_operand" "=v")
- (unspec:V64SI
+ [(set (match_operand:<VnSI> 0 "register_operand" "=v")
+ (unspec:<VnSI>
[(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
UNSPEC_FREXP_EXP))]
""
@@ -2640,9 +2883,27 @@
(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
-(define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
-(define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
-(define_mode_iterator VCVT_IMODE [V64HI V64SI])
+(define_mode_iterator VCVT_MODE
+ [V2HI V2SI V2HF V2SF V2DF
+ V4HI V4SI V4HF V4SF V4DF
+ V8HI V8SI V8HF V8SF V8DF
+ V16HI V16SI V16HF V16SF V16DF
+ V32HI V32SI V32HF V32SF V32DF
+ V64HI V64SI V64HF V64SF V64DF])
+(define_mode_iterator VCVT_FMODE
+ [V2HF V2SF V2DF
+ V4HF V4SF V4DF
+ V8HF V8SF V8DF
+ V16HF V16SF V16DF
+ V32HF V32SF V32DF
+ V64HF V64SF V64DF])
+(define_mode_iterator VCVT_IMODE
+ [V2HI V2SI
+ V4HI V4SI
+ V8HI V8SI
+ V16HI V16SI
+ V32HI V32SI
+ V64HI V64SI])
(define_code_iterator cvt_op [fix unsigned_fix
float unsigned_float
@@ -2669,8 +2930,9 @@
[(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
(cvt_op:VCVT_FMODE
(match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
- "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
- <cvt_name>_cvt)"
+ "MODE_VF (<VCVT_MODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
+ && gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
+ <cvt_name>_cvt)"
"v_cvt<cvt_operands>\t%0, %1"
[(set_attr "type" "vop1")
(set_attr "length" "8")])
@@ -2679,8 +2941,9 @@
[(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
(cvt_op:VCVT_IMODE
(match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
- "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
- <cvt_name>_cvt)"
+ "MODE_VF (<VCVT_IMODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
+ && gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
+ <cvt_name>_cvt)"
"v_cvt<cvt_operands>\t%0, %1"
[(set_attr "type" "vop1")
(set_attr "length" "8")])
@@ -3265,7 +3528,7 @@
(define_expand "reduc_<reduc_op>_scal_<mode>"
[(set (match_operand:<SCALAR_MODE> 0 "register_operand")
(unspec:<SCALAR_MODE>
- [(match_operand:V_ALL 1 "register_operand")]
+ [(match_operand:V64_ALL 1 "register_operand")]
REDUC_UNSPEC))]
""
{
@@ -3284,7 +3547,7 @@
(define_expand "fold_left_plus_<mode>"
[(match_operand:<SCALAR_MODE> 0 "register_operand")
(match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
- (match_operand:V_FP 2 "gcn_alu_operand")]
+ (match_operand:V64_FP 2 "gcn_alu_operand")]
"can_create_pseudo_p ()
&& (flag_openacc || flag_openmp
|| flag_associative_math)"
@@ -3300,11 +3563,11 @@
})
(define_insn "*<reduc_op>_dpp_shr_<mode>"
- [(set (match_operand:V_1REG 0 "register_operand" "=v")
- (unspec:V_1REG
- [(match_operand:V_1REG 1 "register_operand" "v")
- (match_operand:V_1REG 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V64_1REG 0 "register_operand" "=v")
+ (unspec:V64_1REG
+ [(match_operand:V64_1REG 1 "register_operand" "v")
+ (match_operand:V64_1REG 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
REDUC_UNSPEC))]
; GCN3 requires a carry out, GCN5 not
"!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
@@ -3317,11 +3580,11 @@
(set_attr "length" "8")])
(define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
- [(set (match_operand:V_DI 0 "register_operand" "=v")
- (unspec:V_DI
- [(match_operand:V_DI 1 "register_operand" "v")
- (match_operand:V_DI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V64_DI 0 "register_operand" "=v")
+ (unspec:V64_DI
+ [(match_operand:V64_DI 1 "register_operand" "v")
+ (match_operand:V64_DI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
REDUC_2REG_UNSPEC))]
""
"#"
@@ -3346,10 +3609,10 @@
; Special cases for addition.
(define_insn "*plus_carry_dpp_shr_<mode>"
- [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
- (unspec:V_INT_1REG
- [(match_operand:V_INT_1REG 1 "register_operand" "v")
- (match_operand:V_INT_1REG 2 "register_operand" "v")
+ [(set (match_operand:V64_INT_1REG 0 "register_operand" "=v")
+ (unspec:V64_INT_1REG
+ [(match_operand:V64_INT_1REG 1 "register_operand" "v")
+ (match_operand:V64_INT_1REG 2 "register_operand" "v")
(match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
@@ -3363,12 +3626,12 @@
(set_attr "length" "8")])
(define_insn "*plus_carry_in_dpp_shr_<mode>"
- [(set (match_operand:V_SI 0 "register_operand" "=v")
- (unspec:V_SI
- [(match_operand:V_SI 1 "register_operand" "v")
- (match_operand:V_SI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")
- (match_operand:DI 4 "register_operand" "cV")]
+ [(set (match_operand:V64_SI 0 "register_operand" "=v")
+ (unspec:V64_SI
+ [(match_operand:V64_SI 1 "register_operand" "v")
+ (match_operand:V64_SI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")
+ (match_operand:DI 4 "register_operand" "cV")]
UNSPEC_PLUS_CARRY_IN_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""
@@ -3381,11 +3644,11 @@
(set_attr "length" "8")])
(define_insn_and_split "*plus_carry_dpp_shr_<mode>"
- [(set (match_operand:V_DI 0 "register_operand" "=v")
- (unspec:V_DI
- [(match_operand:V_DI 1 "register_operand" "v")
- (match_operand:V_DI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V64_DI 0 "register_operand" "=v")
+ (unspec:V64_DI
+ [(match_operand:V64_DI 1 "register_operand" "v")
+ (match_operand:V64_DI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""
@@ -3416,7 +3679,7 @@
(define_insn "mov_from_lane63_<mode>"
[(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
(unspec:<SCALAR_MODE>
- [(match_operand:V_1REG 1 "register_operand" " v,v")]
+ [(match_operand:V64_1REG 1 "register_operand" " v,v")]
UNSPEC_MOV_FROM_LANE63))]
""
"@
@@ -3429,7 +3692,7 @@
(define_insn "mov_from_lane63_<mode>"
[(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
(unspec:<SCALAR_MODE>
- [(match_operand:V_2REG 1 "register_operand" " v,v")]
+ [(match_operand:V64_2REG 1 "register_operand" " v,v")]
UNSPEC_MOV_FROM_LANE63))]
""
"@
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index c27ee91..3dc294c 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -395,6 +395,97 @@ gcn_scalar_mode_supported_p (scalar_mode mode)
|| mode == TImode);
}
+/* Return a vector mode with N lanes of MODE. */
+
+static machine_mode
+VnMODE (int n, machine_mode mode)
+{
+ switch (mode)
+ {
+ case QImode:
+ switch (n)
+ {
+ case 2: return V2QImode;
+ case 4: return V4QImode;
+ case 8: return V8QImode;
+ case 16: return V16QImode;
+ case 32: return V32QImode;
+ case 64: return V64QImode;
+ }
+ break;
+ case HImode:
+ switch (n)
+ {
+ case 2: return V2HImode;
+ case 4: return V4HImode;
+ case 8: return V8HImode;
+ case 16: return V16HImode;
+ case 32: return V32HImode;
+ case 64: return V64HImode;
+ }
+ break;
+ case HFmode:
+ switch (n)
+ {
+ case 2: return V2HFmode;
+ case 4: return V4HFmode;
+ case 8: return V8HFmode;
+ case 16: return V16HFmode;
+ case 32: return V32HFmode;
+ case 64: return V64HFmode;
+ }
+ break;
+ case SImode:
+ switch (n)
+ {
+ case 2: return V2SImode;
+ case 4: return V4SImode;
+ case 8: return V8SImode;
+ case 16: return V16SImode;
+ case 32: return V32SImode;
+ case 64: return V64SImode;
+ }
+ break;
+ case SFmode:
+ switch (n)
+ {
+ case 2: return V2SFmode;
+ case 4: return V4SFmode;
+ case 8: return V8SFmode;
+ case 16: return V16SFmode;
+ case 32: return V32SFmode;
+ case 64: return V64SFmode;
+ }
+ break;
+ case DImode:
+ switch (n)
+ {
+ case 2: return V2DImode;
+ case 4: return V4DImode;
+ case 8: return V8DImode;
+ case 16: return V16DImode;
+ case 32: return V32DImode;
+ case 64: return V64DImode;
+ }
+ break;
+ case DFmode:
+ switch (n)
+ {
+ case 2: return V2DFmode;
+ case 4: return V4DFmode;
+ case 8: return V8DFmode;
+ case 16: return V16DFmode;
+ case 32: return V32DFmode;
+ case 64: return V64DFmode;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return VOIDmode;
+}
+
/* Implement TARGET_CLASS_MAX_NREGS.
Return the number of hard registers needed to hold a value of MODE in
@@ -556,6 +647,23 @@ gcn_can_change_mode_class (machine_mode from, machine_mode to,
{
if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to))
return true;
+
+ /* Vector conversions are only valid when changing mode with a fixed number
+ of lanes, or changing number of lanes with a fixed mode. Anything else
+ would require actual data movement. */
+ if (VECTOR_MODE_P (from) && VECTOR_MODE_P (to)
+ && GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to)
+ && GET_MODE_INNER (from) != GET_MODE_INNER (to))
+ return false;
+
+ /* Vector/scalar conversions are only permitted when the scalar mode
+ is the same or smaller than the inner vector mode. */
+ if ((VECTOR_MODE_P (from) && !VECTOR_MODE_P (to)
+ && GET_MODE_SIZE (to) >= GET_MODE_SIZE (GET_MODE_INNER (from)))
+ || (VECTOR_MODE_P (to) && !VECTOR_MODE_P (from)
+ && GET_MODE_SIZE (from) >= GET_MODE_SIZE (GET_MODE_INNER (to))))
+ return false;
+
return (gcn_class_max_nregs (regclass, from)
== gcn_class_max_nregs (regclass, to));
}
@@ -595,6 +703,16 @@ gcn_class_likely_spilled_p (reg_class_t rclass)
bool
gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
+ if (VECTOR_MODE_P (mode1) || VECTOR_MODE_P (mode2))
+ {
+ int vf1 = (VECTOR_MODE_P (mode1) ? GET_MODE_NUNITS (mode1) : 1);
+ int vf2 = (VECTOR_MODE_P (mode2) ? GET_MODE_NUNITS (mode2) : 1);
+ machine_mode inner1 = (vf1 > 1 ? GET_MODE_INNER (mode1) : mode1);
+ machine_mode inner2 = (vf2 > 1 ? GET_MODE_INNER (mode2) : mode2);
+
+ return (vf1 == vf2 || (inner1 == inner2 && vf2 <= vf1));
+ }
+
return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
&& GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
}
@@ -616,14 +734,16 @@ gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
rtx
gcn_operand_part (machine_mode mode, rtx op, int n)
{
- if (GET_MODE_SIZE (mode) >= 256)
+ int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
+
+ if (vf > 1)
{
- /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0); */
+ machine_mode vsimode = VnMODE (vf, SImode);
if (REG_P (op))
{
gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
- return gen_rtx_REG (V64SImode, REGNO (op) + n);
+ return gen_rtx_REG (vsimode, REGNO (op) + n);
}
if (GET_CODE (op) == CONST_VECTOR)
{
@@ -634,10 +754,10 @@ gcn_operand_part (machine_mode mode, rtx op, int n)
RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode),
CONST_VECTOR_ELT (op, i), n);
- return gen_rtx_CONST_VECTOR (V64SImode, v);
+ return gen_rtx_CONST_VECTOR (vsimode, v);
}
if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
- return gcn_gen_undef (V64SImode);
+ return gcn_gen_undef (vsimode);
gcc_unreachable ();
}
else if (GET_MODE_SIZE (mode) == 8 && REG_P (op))
@@ -726,7 +846,7 @@ gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl,
/* Create a new DImode pseudo reg and emit an instruction to initialize
it to VAL. */
-static rtx
+rtx
get_exec (int64_t val)
{
rtx reg = gen_reg_rtx (DImode);
@@ -734,36 +854,11 @@ get_exec (int64_t val)
return reg;
}
-/* Return value of scalar exec register. */
-
rtx
-gcn_scalar_exec ()
+get_exec (machine_mode mode)
{
- return const1_rtx;
-}
-
-/* Return pseudo holding scalar exec register. */
-
-rtx
-gcn_scalar_exec_reg ()
-{
- return get_exec (1);
-}
-
-/* Return value of full exec register. */
-
-rtx
-gcn_full_exec ()
-{
- return constm1_rtx;
-}
-
-/* Return pseudo holding full exec register. */
-
-rtx
-gcn_full_exec_reg ()
-{
- return get_exec (-1);
+ int vf = (VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1);
+ return get_exec (0xffffffffffffffffUL >> (64-vf));
}
/* }}} */
@@ -802,8 +897,13 @@ int
gcn_inline_fp_constant_p (rtx x, bool allow_vector)
{
machine_mode mode = GET_MODE (x);
+ int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
+
+ if (vf > 1)
+ mode = GET_MODE_INNER (mode);
- if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
+ if (vf > 1
+ && (mode == HFmode || mode == SFmode || mode == DFmode)
&& allow_vector)
{
int n;
@@ -812,7 +912,7 @@ gcn_inline_fp_constant_p (rtx x, bool allow_vector)
n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
if (!n)
return 0;
- for (int i = 1; i < 64; i++)
+ for (int i = 1; i < vf; i++)
if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
return 0;
return 1;
@@ -867,8 +967,13 @@ bool
gcn_fp_constant_p (rtx x, bool allow_vector)
{
machine_mode mode = GET_MODE (x);
+ int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
- if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
+ if (vf > 1)
+ mode = GET_MODE_INNER (mode);
+
+ if (vf > 1
+ && (mode == HFmode || mode == SFmode || mode == DFmode)
&& allow_vector)
{
int n;
@@ -877,7 +982,7 @@ gcn_fp_constant_p (rtx x, bool allow_vector)
n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
if (!n)
return false;
- for (int i = 1; i < 64; i++)
+ for (int i = 1; i < vf; i++)
if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
return false;
return true;
@@ -1091,6 +1196,249 @@ gcn_gen_undef (machine_mode mode)
}
/* }}} */
+/* {{{ Utility functions. */
+
+/* Generalised accessor functions for instruction patterns.
+ The machine desription '@' prefix does something similar, but as of
+ GCC 10 is incompatible with define_subst, and anyway it doesn't
+ auto-handle the exec feature.
+
+ Four macros are provided; each function only needs one:
+
+ GEN_VN - create accessor functions for all sizes of one mode
+ GEN_VNM - create accessor functions for all sizes of all modes
+ GEN_VN_NOEXEC - for insns without "_exec" variants
+ GEN_VNM_NOEXEC - likewise
+
+ E.g. add<mode>3
+ GEN_VNM (add, 3, A(rtx dest, rtx s1, rtx s2), A(dest, s1, s2)
+
+ gen_addvNsi3 (dst, a, b)
+ -> calls gen_addv64si3, or gen_addv32si3, etc.
+
+ gen_addvNm3 (dst, a, b)
+ -> calls gen_addv64qi3, or gen_addv2di3, etc.
+
+ The mode is determined from the first parameter, which must be called
+ "dest" (or else the macro doesn't work).
+
+ Each function has two optional parameters at the end: merge_src and exec.
+ If exec is non-null, the function will call the "_exec" variant of the
+ insn. If exec is non-null but merge_src is null then an undef unspec
+ will be created.
+
+ E.g. cont.
+ gen_addvNsi3 (v64sidst, a, b, oldval, exec)
+ -> calls gen_addv64si3_exec (v64sidst, a, b, oldval, exec)
+
+ gen_addvNm3 (v2qidst, a, b, NULL, exec)
+ -> calls gen_addv2qi3_exec (v2qidst, a, b,
+ gcn_gen_undef (V2QImode), exec)
+ */
+
+#define A(...) __VA_ARGS__
+#define GEN_VN_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
+static rtx \
+gen_##PREFIX##vN##SUFFIX (PARAMS) \
+{ \
+ machine_mode mode = GET_MODE (dest); \
+ int n = GET_MODE_NUNITS (mode); \
+ \
+ switch (n) \
+ { \
+ case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \
+ case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \
+ case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \
+ case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \
+ case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \
+ case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \
+ } \
+ \
+ gcc_unreachable (); \
+ return NULL_RTX; \
+}
+
+#define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
+GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+static rtx \
+gen_##PREFIX##vNm##SUFFIX (PARAMS) \
+{ \
+ machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \
+ \
+ switch (mode) \
+ { \
+ case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \
+ case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \
+ case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \
+ case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \
+ case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \
+ case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \
+ case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \
+ default: \
+ break; \
+ } \
+ \
+ gcc_unreachable (); \
+ return NULL_RTX; \
+}
+
+#define GEN_VN(PREFIX, SUFFIX, PARAMS, ARGS) \
+static rtx \
+gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
+{ \
+ machine_mode mode = GET_MODE (dest); \
+ int n = GET_MODE_NUNITS (mode); \
+ \
+ if (exec && !merge_src) \
+ merge_src = gcn_gen_undef (mode); \
+ \
+ if (exec) \
+ switch (n) \
+ { \
+ case 2: return gen_##PREFIX##v2##SUFFIX##_exec (ARGS, merge_src, exec); \
+ case 4: return gen_##PREFIX##v4##SUFFIX##_exec (ARGS, merge_src, exec); \
+ case 8: return gen_##PREFIX##v8##SUFFIX##_exec (ARGS, merge_src, exec); \
+ case 16: return gen_##PREFIX##v16##SUFFIX##_exec (ARGS, merge_src, exec); \
+ case 32: return gen_##PREFIX##v32##SUFFIX##_exec (ARGS, merge_src, exec); \
+ case 64: return gen_##PREFIX##v64##SUFFIX##_exec (ARGS, merge_src, exec); \
+ } \
+ else \
+ switch (n) \
+ { \
+ case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \
+ case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \
+ case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \
+ case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \
+ case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \
+ case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \
+ } \
+ \
+ gcc_unreachable (); \
+ return NULL_RTX; \
+}
+
+#define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \
+GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+static rtx \
+gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
+{ \
+ machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \
+ \
+ switch (mode) \
+ { \
+ case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \
+ case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \
+ case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \
+ case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \
+ case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \
+ case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
+ case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \
+ default: \
+ break; \
+ } \
+ \
+ gcc_unreachable (); \
+ return NULL_RTX; \
+}
+
+GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
+ A(dest, src1, src2, vcc))
+GEN_VN (add,di3_sext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,di3_vcc_zext_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
+ A(dest, src1, src2, vcc))
+GEN_VN (add,di3_zext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc),
+ A(dest, src1, src2, vcc))
+GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin),
+ A(dest, src1, src2, vccout, vccin))
+GEN_VN (and,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift))
+GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec),
+ A(dest, addr, src, exec))
+GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol),
+ A(dest, addr, as, vol))
+GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
+GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
+GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c))
+
+#undef GEN_VNM
+#undef GEN_VN
+#undef GET_VN_FN
+#undef A
+
+/* Get icode for vector instructions without an optab. */
+
+#define CODE_FOR(PREFIX, SUFFIX) \
+static int \
+get_code_for_##PREFIX##vN##SUFFIX (int nunits) \
+{ \
+ switch (nunits) \
+ { \
+ case 2: return CODE_FOR_##PREFIX##v2##SUFFIX; \
+ case 4: return CODE_FOR_##PREFIX##v4##SUFFIX; \
+ case 8: return CODE_FOR_##PREFIX##v8##SUFFIX; \
+ case 16: return CODE_FOR_##PREFIX##v16##SUFFIX; \
+ case 32: return CODE_FOR_##PREFIX##v32##SUFFIX; \
+ case 64: return CODE_FOR_##PREFIX##v64##SUFFIX; \
+ } \
+ \
+ gcc_unreachable (); \
+ return CODE_FOR_nothing; \
+}
+
+#define CODE_FOR_OP(PREFIX) \
+ CODE_FOR (PREFIX, qi) \
+ CODE_FOR (PREFIX, hi) \
+ CODE_FOR (PREFIX, hf) \
+ CODE_FOR (PREFIX, si) \
+ CODE_FOR (PREFIX, sf) \
+ CODE_FOR (PREFIX, di) \
+ CODE_FOR (PREFIX, df) \
+static int \
+get_code_for_##PREFIX (machine_mode mode) \
+{ \
+ int vf = GET_MODE_NUNITS (mode); \
+ machine_mode smode = GET_MODE_INNER (mode); \
+ \
+ switch (smode) \
+ { \
+ case E_QImode: return get_code_for_##PREFIX##vNqi (vf); \
+ case E_HImode: return get_code_for_##PREFIX##vNhi (vf); \
+ case E_HFmode: return get_code_for_##PREFIX##vNhf (vf); \
+ case E_SImode: return get_code_for_##PREFIX##vNsi (vf); \
+ case E_SFmode: return get_code_for_##PREFIX##vNsf (vf); \
+ case E_DImode: return get_code_for_##PREFIX##vNdi (vf); \
+ case E_DFmode: return get_code_for_##PREFIX##vNdf (vf); \
+ default: break; \
+ } \
+ \
+ gcc_unreachable (); \
+ return CODE_FOR_nothing; \
+}
+
+CODE_FOR_OP (reload_in)
+CODE_FOR_OP (reload_out)
+
+#undef CODE_FOR_OP
+#undef CODE_FOR
+
+/* }}} */
/* {{{ Addresses, pointers and moves. */
/* Return true is REG is a valid place to store a pointer,
@@ -1644,103 +1992,152 @@ regno_ok_for_index_p (int regno)
return regno == M0_REG || VGPR_REGNO_P (regno);
}
-/* Generate move which uses the exec flags. If EXEC is NULL, then it is
- assumed that all lanes normally relevant to the mode of the move are
- affected. If PREV is NULL, then a sensible default is supplied for
- the inactive lanes. */
+/* Expand vector init of OP0 by VEC.
+ Implements vec_init instruction pattern. */
-static rtx
-gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL)
+void
+gcn_expand_vector_init (rtx op0, rtx vec)
{
+ rtx val[64];
machine_mode mode = GET_MODE (op0);
+ int vf = GET_MODE_NUNITS (mode);
+ machine_mode addrmode = VnMODE (vf, DImode);
+ machine_mode offsetmode = VnMODE (vf, SImode);
- if (vgpr_vector_mode_p (mode))
- {
- if (exec && exec != CONSTM1_RTX (DImode))
- {
- if (!prev)
- prev = op0;
- }
- else
- {
- if (!prev)
- prev = gcn_gen_undef (mode);
- exec = gcn_full_exec_reg ();
- }
+ int64_t mem_mask = 0;
+ int64_t item_mask[64];
+ rtx ramp = gen_reg_rtx (offsetmode);
+ rtx addr = gen_reg_rtx (addrmode);
- rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec));
+ int unit_size = GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op0)));
+ emit_insn (gen_mulvNsi3_dup (ramp, gen_rtx_REG (offsetmode, VGPR_REGNO (1)),
+ GEN_INT (unit_size)));
- return gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (2, set,
- gen_rtx_CLOBBER (VOIDmode,
- gen_rtx_SCRATCH (V64DImode))));
- }
+ bool simple_repeat = true;
- return (gen_rtx_PARALLEL
- (VOIDmode,
- gen_rtvec (2, gen_rtx_SET (op0, op1),
- gen_rtx_USE (VOIDmode,
- exec ? exec : gcn_scalar_exec ()))));
-}
+ /* Expand nested vectors into one vector. */
+ int item_count = XVECLEN (vec, 0);
+ for (int i = 0, j = 0; i < item_count; i++)
+ {
+ rtx item = XVECEXP (vec, 0, i);
+ machine_mode mode = GET_MODE (item);
+ int units = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
+ item_mask[j] = (((uint64_t)-1)>>(64-units)) << j;
-/* Generate masked move. */
+ if (simple_repeat && i != 0)
+ simple_repeat = item == XVECEXP (vec, 0, i-1);
-static rtx
-gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL)
-{
- if (exec)
- return (gen_rtx_SET (op0,
- gen_rtx_VEC_MERGE (GET_MODE (op0),
- gen_rtx_VEC_DUPLICATE (GET_MODE
- (op0), op1),
- op2, exec)));
- else
- return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1)));
-}
+ /* If its a vector of values then copy them into the final location. */
+ if (GET_CODE (item) == CONST_VECTOR)
+ {
+ for (int k = 0; k < units; k++)
+ val[j++] = XVECEXP (item, 0, k);
+ continue;
+ }
+ /* Otherwise, we have a scalar or an expression that expands... */
-/* Expand vector init of OP0 by VEC.
- Implements vec_init instruction pattern. */
+ if (MEM_P (item))
+ {
+ rtx base = XEXP (item, 0);
+ if (MEM_ADDR_SPACE (item) == DEFAULT_ADDR_SPACE
+ && REG_P (base))
+ {
+ /* We have a simple vector load. We can put the addresses in
+ the vector, combine it with any other such MEMs, and load it
+ all with a single gather at the end. */
+ int64_t mask = ((0xffffffffffffffffUL
+ >> (64-GET_MODE_NUNITS (mode)))
+ << j);
+ rtx exec = get_exec (mask);
+ emit_insn (gen_subvNsi3
+ (ramp, ramp,
+ gcn_vec_constant (offsetmode, j*unit_size),
+ ramp, exec));
+ emit_insn (gen_addvNdi3_zext_dup2
+ (addr, ramp, base,
+ (mem_mask ? addr : gcn_gen_undef (addrmode)),
+ exec));
+ mem_mask |= mask;
+ }
+ else
+ /* The MEM is non-trivial, so let's load it independently. */
+ item = force_reg (mode, item);
+ }
+ else if (!CONST_INT_P (item) && !CONST_DOUBLE_P (item))
+ /* The item may be a symbol_ref, or something else non-trivial. */
+ item = force_reg (mode, item);
+
+ /* Duplicate the vector across each item.
+ It is either a smaller vector register that needs shifting,
+ or a MEM that needs loading. */
+ val[j] = item;
+ j += units;
+ }
-void
-gcn_expand_vector_init (rtx op0, rtx vec)
-{
int64_t initialized_mask = 0;
- int64_t curr_mask = 1;
- machine_mode mode = GET_MODE (op0);
+ rtx prev = NULL;
- rtx val = XVECEXP (vec, 0, 0);
-
- for (int i = 1; i < 64; i++)
- if (rtx_equal_p (val, XVECEXP (vec, 0, i)))
- curr_mask |= (int64_t) 1 << i;
+ if (mem_mask)
+ {
+ emit_insn (gen_gathervNm_expr
+ (op0, gen_rtx_PLUS (addrmode, addr,
+ gen_rtx_VEC_DUPLICATE (addrmode,
+ const0_rtx)),
+ GEN_INT (DEFAULT_ADDR_SPACE), GEN_INT (0),
+ NULL, get_exec (mem_mask)));
+ prev = op0;
+ initialized_mask = mem_mask;
+ }
- if (gcn_constant_p (val))
- emit_move_insn (op0, gcn_vec_constant (mode, val));
- else
+ if (simple_repeat && item_count > 1 && !prev)
{
- val = force_reg (GET_MODE_INNER (mode), val);
- emit_insn (gen_duplicate_load (op0, val));
+ /* Special case for instances of {A, B, A, B, A, B, ....}, etc. */
+ rtx src = gen_rtx_SUBREG (mode, val[0], 0);
+ rtx input_vf_mask = GEN_INT (GET_MODE_NUNITS (GET_MODE (val[0]))-1);
+
+ rtx permutation = gen_reg_rtx (VnMODE (vf, SImode));
+ emit_insn (gen_vec_seriesvNsi (permutation, GEN_INT (0), GEN_INT (1)));
+ rtx mask_dup = gen_reg_rtx (VnMODE (vf, SImode));
+ emit_insn (gen_vec_duplicatevNsi (mask_dup, input_vf_mask));
+ emit_insn (gen_andvNsi3 (permutation, permutation, mask_dup));
+ emit_insn (gen_ashlvNsi3 (permutation, permutation, GEN_INT (2)));
+ emit_insn (gen_ds_bpermutevNm (op0, permutation, src, get_exec (mode)));
+ return;
}
- initialized_mask |= curr_mask;
- for (int i = 1; i < 64; i++)
+
+ /* Write each value, elementwise, but coalesce matching values into one
+ instruction, where possible. */
+ for (int i = 0; i < vf; i++)
if (!(initialized_mask & ((int64_t) 1 << i)))
{
- curr_mask = (int64_t) 1 << i;
- rtx val = XVECEXP (vec, 0, i);
-
- for (int j = i + 1; j < 64; j++)
- if (rtx_equal_p (val, XVECEXP (vec, 0, j)))
- curr_mask |= (int64_t) 1 << j;
- if (gcn_constant_p (val))
- emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val),
- get_exec (curr_mask)));
+ if (gcn_constant_p (val[i]))
+ emit_insn (gen_movvNm (op0, gcn_vec_constant (mode, val[i]), prev,
+ get_exec (item_mask[i])));
+ else if (VECTOR_MODE_P (GET_MODE (val[i]))
+ && (GET_MODE_NUNITS (GET_MODE (val[i])) == vf
+ || i == 0))
+ emit_insn (gen_movvNm (op0, gen_rtx_SUBREG (mode, val[i], 0), prev,
+ get_exec (item_mask[i])));
+ else if (VECTOR_MODE_P (GET_MODE (val[i])))
+ {
+ rtx permutation = gen_reg_rtx (VnMODE (vf, SImode));
+ emit_insn (gen_vec_seriesvNsi (permutation, GEN_INT (-i*4),
+ GEN_INT (4)));
+ rtx tmp = gen_reg_rtx (mode);
+ emit_insn (gen_ds_bpermutevNm (tmp, permutation,
+ gen_rtx_SUBREG (mode, val[i], 0),
+ get_exec (-1)));
+ emit_insn (gen_movvNm (op0, tmp, prev, get_exec (item_mask[i])));
+ }
else
{
- val = force_reg (GET_MODE_INNER (mode), val);
- emit_insn (gen_duplicate_load (op0, val, op0,
- get_exec (curr_mask)));
+ rtx reg = force_reg (GET_MODE_INNER (mode), val[i]);
+ emit_insn (gen_vec_duplicatevNm (op0, reg, prev,
+ get_exec (item_mask[i])));
}
- initialized_mask |= curr_mask;
+
+ initialized_mask |= item_mask[i];
+ prev = op0;
}
}
@@ -1751,18 +2148,18 @@ strided_constant (machine_mode mode, int base, int val)
{
rtx x = gen_reg_rtx (mode);
emit_move_insn (x, gcn_vec_constant (mode, base));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32),
- x, get_exec (0xffffffff00000000)));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16),
- x, get_exec (0xffff0000ffff0000)));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8),
- x, get_exec (0xff00ff00ff00ff00)));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4),
- x, get_exec (0xf0f0f0f0f0f0f0f0)));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2),
- x, get_exec (0xcccccccccccccccc)));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1),
- x, get_exec (0xaaaaaaaaaaaaaaaa)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 32),
+ x, get_exec (0xffffffff00000000)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 16),
+ x, get_exec (0xffff0000ffff0000)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 8),
+ x, get_exec (0xff00ff00ff00ff00)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 4),
+ x, get_exec (0xf0f0f0f0f0f0f0f0)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 2),
+ x, get_exec (0xcccccccccccccccc)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 1),
+ x, get_exec (0xaaaaaaaaaaaaaaaa)));
return x;
}
@@ -1792,15 +2189,17 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
case ADDR_SPACE_LDS:
case ADDR_SPACE_GDS:
/* FIXME: LDS support offsets, handle them!. */
- if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode)
+ if (vgpr_vector_mode_p (mode)
+ && GET_MODE_INNER (GET_MODE (x)) != SImode)
{
- rtx addrs = gen_reg_rtx (V64SImode);
+ machine_mode simode = VnMODE (GET_MODE_NUNITS (mode), SImode);
+ rtx addrs = gen_reg_rtx (simode);
rtx base = force_reg (SImode, x);
- rtx offsets = strided_constant (V64SImode, 0,
+ rtx offsets = strided_constant (simode, 0,
GET_MODE_UNIT_SIZE (mode));
- emit_insn (gen_vec_duplicatev64si (addrs, base));
- emit_insn (gen_addv64si3 (addrs, offsets, addrs));
+ emit_insn (gen_vec_duplicatevNsi (addrs, base));
+ emit_insn (gen_addvNsi3 (addrs, offsets, addrs));
return addrs;
}
return x;
@@ -1808,16 +2207,18 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
gcc_unreachable ();
}
-/* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:V64DI)) with the
+/* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:VnDI)) with the
proper vector of stepped addresses.
MEM will be a DImode address of a vector in an SGPR.
- TMP will be a V64DImode VGPR pair or (scratch:V64DI). */
+ TMP will be a VnDImode VGPR pair or (scratch:VnDI). */
rtx
gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
rtx tmp)
{
+ machine_mode pmode = VnMODE (GET_MODE_NUNITS (mode), DImode);
+ machine_mode offmode = VnMODE (GET_MODE_NUNITS (mode), SImode);
gcc_assert (MEM_P (mem));
rtx mem_base = XEXP (mem, 0);
rtx mem_index = NULL_RTX;
@@ -1841,22 +2242,18 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
machine_mode inner = GET_MODE_INNER (mode);
int shift = exact_log2 (GET_MODE_SIZE (inner));
- rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
- rtx undef_v64si = gcn_gen_undef (V64SImode);
+ rtx ramp = gen_rtx_REG (offmode, VGPR_REGNO (1));
rtx new_base = NULL_RTX;
addr_space_t as = MEM_ADDR_SPACE (mem);
rtx tmplo = (REG_P (tmp)
- ? gcn_operand_part (V64DImode, tmp, 0)
- : gen_reg_rtx (V64SImode));
+ ? gcn_operand_part (pmode, tmp, 0)
+ : gen_reg_rtx (offmode));
/* tmplo[:] = ramp[:] << shift */
- if (exec)
- emit_insn (gen_ashlv64si3_exec (tmplo, ramp,
- gen_int_mode (shift, SImode),
- undef_v64si, exec));
- else
- emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode)));
+ emit_insn (gen_ashlvNsi3 (tmplo, ramp,
+ gen_int_mode (shift, SImode),
+ NULL, exec));
if (AS_FLAT_P (as))
{
@@ -1866,53 +2263,41 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
{
rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0);
rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1);
- rtx tmphi = gcn_operand_part (V64DImode, tmp, 1);
+ rtx tmphi = gcn_operand_part (pmode, tmp, 1);
/* tmphi[:] = mem_base_hi */
- if (exec)
- emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi,
- undef_v64si, exec));
- else
- emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi));
+ emit_insn (gen_vec_duplicatevNsi (tmphi, mem_base_hi, NULL, exec));
/* tmp[:] += zext (mem_base) */
if (exec)
{
- emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo,
- vcc, undef_v64si, exec));
- emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx,
- vcc, vcc, undef_v64si, exec));
+ emit_insn (gen_addvNsi3_vcc_dup (tmplo, mem_base_lo, tmplo,
+ vcc, NULL, exec));
+ emit_insn (gen_addcvNsi3 (tmphi, tmphi, const0_rtx,
+ vcc, vcc, NULL, exec));
}
else
- emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc));
+ emit_insn (gen_addvNdi3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc));
}
else
{
- tmp = gen_reg_rtx (V64DImode);
- if (exec)
- emit_insn (gen_addv64di3_vcc_zext_dup2_exec
- (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode),
- exec));
- else
- emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc));
+ tmp = gen_reg_rtx (pmode);
+ emit_insn (gen_addvNdi3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc,
+ NULL, exec));
}
new_base = tmp;
}
else if (AS_ANY_DS_P (as))
{
- if (!exec)
- emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base));
- else
- emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base,
- gcn_gen_undef (V64SImode), exec));
+ emit_insn (gen_addvNsi3_dup (tmplo, tmplo, mem_base, NULL, exec));
new_base = tmplo;
}
else
{
- mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base);
- new_base = gen_rtx_PLUS (V64DImode, mem_base,
- gen_rtx_SIGN_EXTEND (V64DImode, tmplo));
+ mem_base = gen_rtx_VEC_DUPLICATE (pmode, mem_base);
+ new_base = gen_rtx_PLUS (pmode, mem_base,
+ gen_rtx_SIGN_EXTEND (pmode, tmplo));
}
return gen_rtx_PLUS (GET_MODE (new_base), new_base,
@@ -1929,42 +2314,33 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
If EXEC is set then _exec patterns will be used, otherwise plain.
Return values.
- ADDR_SPACE_FLAT - return V64DImode vector of absolute addresses.
- ADDR_SPACE_GLOBAL - return V64SImode vector of offsets. */
+ ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses.
+ ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */
rtx
gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
bool unsigned_p, rtx exec)
{
- rtx tmpsi = gen_reg_rtx (V64SImode);
- rtx tmpdi = gen_reg_rtx (V64DImode);
- rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL;
- rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL;
+ int vf = GET_MODE_NUNITS (GET_MODE (offsets));
+ rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode));
+ rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode));
if (CONST_INT_P (scale)
&& INTVAL (scale) > 0
&& exact_log2 (INTVAL (scale)) >= 0)
- emit_insn (gen_ashlv64si3 (tmpsi, offsets,
- GEN_INT (exact_log2 (INTVAL (scale)))));
+ emit_insn (gen_ashlvNsi3 (tmpsi, offsets,
+ GEN_INT (exact_log2 (INTVAL (scale))),
+ NULL, exec));
else
- (exec
- ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi,
- exec))
- : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale)));
+ emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec));
/* "Global" instructions do not support negative register offsets. */
if (as == ADDR_SPACE_FLAT || !unsigned_p)
{
if (unsigned_p)
- (exec
- ? emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base,
- undefdi, exec))
- : emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base)));
+ emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec));
else
- (exec
- ? emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base,
- undefdi, exec))
- : emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base)));
+ emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec));
return tmpdi;
}
else if (as == ADDR_SPACE_GLOBAL)
@@ -2065,59 +2441,9 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
|| GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT)
{
if (in_p)
- switch (reload_mode)
- {
- case E_V64SImode:
- sri->icode = CODE_FOR_reload_inv64si;
- break;
- case E_V64SFmode:
- sri->icode = CODE_FOR_reload_inv64sf;
- break;
- case E_V64HImode:
- sri->icode = CODE_FOR_reload_inv64hi;
- break;
- case E_V64HFmode:
- sri->icode = CODE_FOR_reload_inv64hf;
- break;
- case E_V64QImode:
- sri->icode = CODE_FOR_reload_inv64qi;
- break;
- case E_V64DImode:
- sri->icode = CODE_FOR_reload_inv64di;
- break;
- case E_V64DFmode:
- sri->icode = CODE_FOR_reload_inv64df;
- break;
- default:
- gcc_unreachable ();
- }
+ sri->icode = get_code_for_reload_in (reload_mode);
else
- switch (reload_mode)
- {
- case E_V64SImode:
- sri->icode = CODE_FOR_reload_outv64si;
- break;
- case E_V64SFmode:
- sri->icode = CODE_FOR_reload_outv64sf;
- break;
- case E_V64HImode:
- sri->icode = CODE_FOR_reload_outv64hi;
- break;
- case E_V64HFmode:
- sri->icode = CODE_FOR_reload_outv64hf;
- break;
- case E_V64QImode:
- sri->icode = CODE_FOR_reload_outv64qi;
- break;
- case E_V64DImode:
- sri->icode = CODE_FOR_reload_outv64di;
- break;
- case E_V64DFmode:
- sri->icode = CODE_FOR_reload_outv64df;
- break;
- default:
- gcc_unreachable ();
- }
+ sri->icode = get_code_for_reload_out (reload_mode);
break;
}
/* Fallthrough. */
@@ -3428,6 +3754,9 @@ gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op)
if (VECTOR_MODE_P (from))
{
+ if (GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to))
+ return false;
+
from = GET_MODE_INNER (from);
to = GET_MODE_INNER (to);
}
@@ -3926,7 +4255,7 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
rtx mem = gen_rtx_MEM (GET_MODE (target), addrs);
/*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
/* FIXME: set attributes. */
- emit_insn (gen_mov_with_exec (target, mem, exec));
+ emit_insn (gen_movvNm (target, mem, NULL, exec));
return target;
}
case GCN_BUILTIN_FLAT_STORE_PTR_INT32:
@@ -3961,20 +4290,18 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
rtx mem = gen_rtx_MEM (vmode, addrs);
/*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
/* FIXME: set attributes. */
- emit_insn (gen_mov_with_exec (mem, val, exec));
+ emit_insn (gen_movvNm (mem, val, NULL, exec));
return target;
}
case GCN_BUILTIN_SQRTVF:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64SFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
EXPAND_NORMAL));
- emit_insn (gen_sqrtv64sf2_exec
- (target, arg, gcn_gen_undef (V64SFmode), exec));
+ emit_insn (gen_sqrtv64sf2 (target, arg));
return target;
}
case GCN_BUILTIN_SQRTF:
@@ -3992,20 +4319,17 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64SFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
EXPAND_NORMAL));
- emit_insn (gen_absv64sf2_exec
- (target, arg, gcn_gen_undef (V64SFmode), exec));
+ emit_insn (gen_absv64sf2 (target, arg));
return target;
}
case GCN_BUILTIN_LDEXPVF:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg1 = force_reg (V64SFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
@@ -4014,15 +4338,13 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
V64SImode,
EXPAND_NORMAL));
- emit_insn (gen_ldexpv64sf3_exec
- (target, arg1, arg2, gcn_gen_undef (V64SFmode), exec));
+ emit_insn (gen_ldexpv64sf3 (target, arg1, arg2));
return target;
}
case GCN_BUILTIN_LDEXPV:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg1 = force_reg (V64DFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
@@ -4031,60 +4353,51 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
V64SImode,
EXPAND_NORMAL));
- emit_insn (gen_ldexpv64df3_exec
- (target, arg1, arg2, gcn_gen_undef (V64DFmode), exec));
+ emit_insn (gen_ldexpv64df3 (target, arg1, arg2));
return target;
}
case GCN_BUILTIN_FREXPVF_EXP:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64SFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
EXPAND_NORMAL));
- emit_insn (gen_frexpv64sf_exp2_exec
- (target, arg, gcn_gen_undef (V64SImode), exec));
+ emit_insn (gen_frexpv64sf_exp2 (target, arg));
return target;
}
case GCN_BUILTIN_FREXPVF_MANT:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64SFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
EXPAND_NORMAL));
- emit_insn (gen_frexpv64sf_mant2_exec
- (target, arg, gcn_gen_undef (V64SFmode), exec));
+ emit_insn (gen_frexpv64sf_mant2 (target, arg));
return target;
}
case GCN_BUILTIN_FREXPV_EXP:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64DFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64DFmode,
EXPAND_NORMAL));
- emit_insn (gen_frexpv64df_exp2_exec
- (target, arg, gcn_gen_undef (V64SImode), exec));
+ emit_insn (gen_frexpv64df_exp2 (target, arg));
return target;
}
case GCN_BUILTIN_FREXPV_MANT:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64DFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64DFmode,
EXPAND_NORMAL));
- emit_insn (gen_frexpv64df_mant2_exec
- (target, arg, gcn_gen_undef (V64DFmode), exec));
+ emit_insn (gen_frexpv64df_mant2 (target, arg));
return target;
}
case GCN_BUILTIN_OMP_DIM_SIZE:
@@ -4239,10 +4552,11 @@ gcn_vectorize_get_mask_mode (machine_mode)
Helper function for gcn_vectorize_vec_perm_const. */
static rtx
-gcn_make_vec_perm_address (unsigned int *perm)
+gcn_make_vec_perm_address (unsigned int *perm, int nelt)
{
- rtx x = gen_reg_rtx (V64SImode);
- emit_move_insn (x, gcn_vec_constant (V64SImode, 0));
+ machine_mode mode = VnMODE (nelt, SImode);
+ rtx x = gen_reg_rtx (mode);
+ emit_move_insn (x, gcn_vec_constant (mode, 0));
/* Permutation addresses use byte addressing. With each vector lane being
4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant,
@@ -4258,15 +4572,13 @@ gcn_make_vec_perm_address (unsigned int *perm)
{
uint64_t exec_mask = 0;
uint64_t lane_mask = 1;
- for (int j = 0; j < 64; j++, lane_mask <<= 1)
- if ((perm[j] * 4) & bit_mask)
+ for (int j = 0; j < nelt; j++, lane_mask <<= 1)
+ if (((perm[j] % nelt) * 4) & bit_mask)
exec_mask |= lane_mask;
if (exec_mask)
- emit_insn (gen_addv64si3_exec (x, x,
- gcn_vec_constant (V64SImode,
- bit_mask),
- x, get_exec (exec_mask)));
+ emit_insn (gen_addvNsi3 (x, x, gcn_vec_constant (mode, bit_mask),
+ x, get_exec (exec_mask)));
}
return x;
@@ -4336,39 +4648,11 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
src1_lanes |= lane_bit;
}
- rtx addr = gcn_make_vec_perm_address (perm);
- rtx (*ds_bpermute) (rtx, rtx, rtx, rtx);
-
- switch (vmode)
- {
- case E_V64QImode:
- ds_bpermute = gen_ds_bpermutev64qi;
- break;
- case E_V64HImode:
- ds_bpermute = gen_ds_bpermutev64hi;
- break;
- case E_V64SImode:
- ds_bpermute = gen_ds_bpermutev64si;
- break;
- case E_V64HFmode:
- ds_bpermute = gen_ds_bpermutev64hf;
- break;
- case E_V64SFmode:
- ds_bpermute = gen_ds_bpermutev64sf;
- break;
- case E_V64DImode:
- ds_bpermute = gen_ds_bpermutev64di;
- break;
- case E_V64DFmode:
- ds_bpermute = gen_ds_bpermutev64df;
- break;
- default:
- gcc_assert (false);
- }
+ rtx addr = gcn_make_vec_perm_address (perm, nelt);
/* Load elements from src0 to dst. */
- gcc_assert (~src1_lanes);
- emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ()));
+ gcc_assert ((~src1_lanes) & (0xffffffffffffffffUL > (64-nelt)));
+ emit_insn (gen_ds_bpermutevNm (dst, addr, src0, get_exec (vmode)));
/* Load elements from src1 to dst. */
if (src1_lanes)
@@ -4379,8 +4663,8 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
the two source vectors together.
*/
rtx tmp = gen_reg_rtx (vmode);
- emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ()));
- emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes)));
+ emit_insn (gen_ds_bpermutevNm (tmp, addr, src1, get_exec (vmode)));
+ emit_insn (gen_movvNm (dst, tmp, dst, get_exec (src1_lanes)));
}
return true;
@@ -4396,7 +4680,22 @@ gcn_vector_mode_supported_p (machine_mode mode)
{
return (mode == V64QImode || mode == V64HImode
|| mode == V64SImode || mode == V64DImode
- || mode == V64SFmode || mode == V64DFmode);
+ || mode == V64SFmode || mode == V64DFmode
+ || mode == V32QImode || mode == V32HImode
+ || mode == V32SImode || mode == V32DImode
+ || mode == V32SFmode || mode == V32DFmode
+ || mode == V16QImode || mode == V16HImode
+ || mode == V16SImode || mode == V16DImode
+ || mode == V16SFmode || mode == V16DFmode
+ || mode == V8QImode || mode == V8HImode
+ || mode == V8SImode || mode == V8DImode
+ || mode == V8SFmode || mode == V8DFmode
+ || mode == V4QImode || mode == V4HImode
+ || mode == V4SImode || mode == V4DImode
+ || mode == V4SFmode || mode == V4DFmode
+ || mode == V2QImode || mode == V2HImode
+ || mode == V2SImode || mode == V2DImode
+ || mode == V2SFmode || mode == V2DFmode);
}
/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
@@ -4425,23 +4724,74 @@ gcn_vectorize_preferred_simd_mode (scalar_mode mode)
}
}
+/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.
+
+ Try all the vector modes. */
+
+unsigned int gcn_autovectorize_vector_modes (vector_modes *modes,
+ bool ARG_UNUSED (all))
+{
+ modes->safe_push (V64QImode);
+ modes->safe_push (V64HImode);
+ modes->safe_push (V64SImode);
+ modes->safe_push (V64SFmode);
+ modes->safe_push (V64DImode);
+ modes->safe_push (V64DFmode);
+
+ modes->safe_push (V32QImode);
+ modes->safe_push (V32HImode);
+ modes->safe_push (V32SImode);
+ modes->safe_push (V32SFmode);
+ modes->safe_push (V32DImode);
+ modes->safe_push (V32DFmode);
+
+ modes->safe_push (V16QImode);
+ modes->safe_push (V16HImode);
+ modes->safe_push (V16SImode);
+ modes->safe_push (V16SFmode);
+ modes->safe_push (V16DImode);
+ modes->safe_push (V16DFmode);
+
+ modes->safe_push (V8QImode);
+ modes->safe_push (V8HImode);
+ modes->safe_push (V8SImode);
+ modes->safe_push (V8SFmode);
+ modes->safe_push (V8DImode);
+ modes->safe_push (V8DFmode);
+
+ modes->safe_push (V4QImode);
+ modes->safe_push (V4HImode);
+ modes->safe_push (V4SImode);
+ modes->safe_push (V4SFmode);
+ modes->safe_push (V4DImode);
+ modes->safe_push (V4DFmode);
+
+ modes->safe_push (V2QImode);
+ modes->safe_push (V2HImode);
+ modes->safe_push (V2SImode);
+ modes->safe_push (V2SFmode);
+ modes->safe_push (V2DImode);
+ modes->safe_push (V2DFmode);
+
+ /* We shouldn't need VECT_COMPARE_COSTS as they should all cost the same. */
+ return 0;
+}
+
/* Implement TARGET_VECTORIZE_RELATED_MODE.
All GCN vectors are 64-lane, so this is simpler than other architectures.
In particular, we do *not* want to match vector bit-size. */
static opt_machine_mode
-gcn_related_vector_mode (machine_mode ARG_UNUSED (vector_mode),
+gcn_related_vector_mode (machine_mode vector_mode,
scalar_mode element_mode, poly_uint64 nunits)
{
- if (known_ne (nunits, 0U) && known_ne (nunits, 64U))
- return VOIDmode;
+ int n = nunits.to_constant ();
- machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode);
- if (!VECTOR_MODE_P (pref_mode))
- return VOIDmode;
+ if (n == 0)
+ n = GET_MODE_NUNITS (vector_mode);
- return pref_mode;
+ return VnMODE (n, element_mode);
}
/* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
@@ -4566,6 +4916,8 @@ gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
The vector register SRC of mode MODE is reduced using the operation given
by UNSPEC, and the scalar result is returned in lane 63 of a vector
register. */
+/* FIXME: Implement reductions for sizes other than V64.
+ (They're currently disabled in the machine description.) */
rtx
gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
@@ -4975,10 +5327,11 @@ gcn_md_reorg (void)
{
if (VECTOR_MODE_P (GET_MODE (x)))
{
- new_exec = -1;
- break;
+ int vf = GET_MODE_NUNITS (GET_MODE (x));
+ new_exec = MAX ((uint64_t)new_exec,
+ 0xffffffffffffffffUL >> (64-vf));
}
- else
+ else if (new_exec == 0)
new_exec = 1;
}
}
@@ -5693,13 +6046,12 @@ static void
print_reg (FILE *file, rtx x)
{
machine_mode mode = GET_MODE (x);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
- || mode == HFmode || mode == SFmode
- || mode == V64SFmode || mode == V64SImode
- || mode == V64QImode || mode == V64HImode)
+ || mode == HFmode || mode == SFmode)
fprintf (file, "%s", reg_names[REGNO (x)]);
- else if (mode == DImode || mode == V64DImode
- || mode == DFmode || mode == V64DFmode)
+ else if (mode == DImode || mode == DFmode)
{
if (SGPR_REGNO_P (REGNO (x)))
fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
@@ -6146,20 +6498,20 @@ print_operand (FILE *file, rtx x, int code)
case 'o':
{
const char *s = 0;
- switch (GET_MODE_SIZE (GET_MODE (x)))
+ machine_mode mode = GET_MODE (x);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+
+ switch (mode)
{
- case 1:
+ case E_QImode:
s = "_ubyte";
break;
- case 2:
+ case E_HImode:
+ case E_HFmode:
s = "_ushort";
break;
- /* The following are full-vector variants. */
- case 64:
- s = "_ubyte";
- break;
- case 128:
- s = "_ushort";
+ default:
break;
}
@@ -6174,43 +6526,31 @@ print_operand (FILE *file, rtx x, int code)
}
case 's':
{
- const char *s = "";
- switch (GET_MODE_SIZE (GET_MODE (x)))
+ const char *s;
+ machine_mode mode = GET_MODE (x);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+
+ switch (mode)
{
- case 1:
+ case E_QImode:
s = "_byte";
break;
- case 2:
+ case E_HImode:
+ case E_HFmode:
s = "_short";
break;
- case 4:
+ case E_SImode:
+ case E_SFmode:
s = "_dword";
break;
- case 8:
+ case E_DImode:
+ case E_DFmode:
s = "_dwordx2";
break;
- case 12:
- s = "_dwordx3";
- break;
- case 16:
+ case E_TImode:
s = "_dwordx4";
break;
- case 32:
- s = "_dwordx8";
- break;
- case 64:
- s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16";
- break;
- /* The following are full-vector variants. */
- case 128:
- s = "_short";
- break;
- case 256:
- s = "_dword";
- break;
- case 512:
- s = "_dwordx2";
- break;
default:
output_operand_lossage ("invalid operand %%xn code");
return;
@@ -6714,6 +7054,9 @@ gcn_dwarf_register_span (rtx rtl)
#define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
+ gcn_autovectorize_vector_modes
#undef TARGET_BUILTIN_DECL
#define TARGET_BUILTIN_DECL gcn_builtin_decl
#undef TARGET_CAN_CHANGE_MODE_CLASS
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index 318256c..38f7212 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -678,3 +678,27 @@ enum gcn_builtin_codes
/* Trampolines */
#define TRAMPOLINE_SIZE 36
#define TRAMPOLINE_ALIGNMENT 64
+
+/* MD Optimization.
+ The following are intended to be obviously constant at compile time to
+ allow genconditions to eliminate bad patterns at compile time. */
+#define MODE_VF(M) \
+ ((M == V64QImode || M == V64HImode || M == V64HFmode || M == V64SImode \
+ || M == V64SFmode || M == V64DImode || M == V64DFmode) \
+ ? 64 \
+ : (M == V32QImode || M == V32HImode || M == V32HFmode || M == V32SImode \
+ || M == V32SFmode || M == V32DImode || M == V32DFmode) \
+ ? 32 \
+ : (M == V16QImode || M == V16HImode || M == V16HFmode || M == V16SImode \
+ || M == V16SFmode || M == V16DImode || M == V16DFmode) \
+ ? 16 \
+ : (M == V8QImode || M == V8HImode || M == V8HFmode || M == V8SImode \
+ || M == V8SFmode || M == V8DImode || M == V8DFmode) \
+ ? 8 \
+ : (M == V4QImode || M == V4HImode || M == V4HFmode || M == V4SImode \
+ || M == V4SFmode || M == V4DImode || M == V4DFmode) \
+ ? 4 \
+ : (M == V2QImode || M == V2HImode || M == V2HFmode || M == V2SImode \
+ || M == V2SFmode || M == V2DImode || M == V2DFmode) \
+ ? 2 \
+ : 1)
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index 3c702fd..ef56704 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -589,15 +589,12 @@ const char *host_detect_local_cpu (int argc, const char **argv)
/* This is unknown family 0x6 CPU. */
if (has_feature (FEATURE_AVX))
{
+ /* Assume Tiger Lake */
if (has_feature (FEATURE_AVX512VP2INTERSECT))
- {
- if (has_feature (FEATURE_TSXLDTRK))
- /* Assume Sapphire Rapids. */
- cpu = "sapphirerapids";
- else
- /* Assume Tiger Lake */
- cpu = "tigerlake";
- }
+ cpu = "tigerlake";
+ /* Assume Sapphire Rapids. */
+ else if (has_feature (FEATURE_TSXLDTRK))
+ cpu = "sapphirerapids";
/* Assume Cooper Lake */
else if (has_feature (FEATURE_AVX512BF16))
cpu = "cooperlake";
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 900a3bc..372a2cf 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2326,10 +2326,9 @@ constexpr wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT
constexpr wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI
| PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT | PTA_KL | PTA_WIDEKL;
constexpr wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_ICELAKE_SERVER | PTA_MOVDIRI
- | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE
- | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE
- | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16
- | PTA_AVX512BF16;
+ | PTA_MOVDIR64B | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG
+ | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16
+ | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16 | PTA_AVX512BF16;
constexpr wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF
| PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1;
constexpr wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1be9b66..8e84752 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -10826,6 +10826,39 @@
(set_attr "type" "alu, alu, msklog")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*notxor<mode>_1"
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
+ (not:SWI248
+ (xor:SWI248
+ (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
+ (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
+ "#"
+ "&& reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (xor:SWI248 (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0)
+ (not:SWI248 (match_dup 0)))]
+{
+ if (MASK_REG_P (operands[0]))
+ {
+ emit_insn (gen_kxnor<mode> (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+}
+ [(set (attr "isa")
+ (cond [(eq_attr "alternative" "2")
+ (if_then_else (eq_attr "mode" "SI,DI")
+ (const_string "avx512bw")
+ (const_string "avx512f"))
+ ]
+ (const_string "*")))
+ (set_attr "type" "alu, alu, msklog")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "*iordi_1_bts"
[(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
(ior:DI
@@ -10959,6 +10992,44 @@
(symbol_ref "!TARGET_PARTIAL_REG_STALL")]
(symbol_ref "true")))])
+(define_insn_and_split "*notxorqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
+ (not:QI
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
+ (match_operand:QI 2 "general_operand" "qn,m,rn,k"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (XOR, QImode, operands)"
+ "#"
+ "&& reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (xor:QI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0)
+ (not:QI (match_dup 0)))]
+{
+ if (mask_reg_operand (operands[0], QImode))
+ {
+ emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+}
+ [(set_attr "isa" "*,*,*,avx512f")
+ (set_attr "type" "alu,alu,alu,msklog")
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "SI")
+ (and (eq_attr "alternative" "3")
+ (match_test "!TARGET_AVX512DQ"))
+ (const_string "HI")
+ ]
+ (const_string "QI")))
+ ;; Potential partial reg stall on alternative 2.
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "2")
+ (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+ (symbol_ref "true")))])
+
;; Alternative 1 is needed to work around LRA limitation, see PR82524.
(define_insn_and_split "*<code><mode>_1_slp"
[(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
@@ -20145,8 +20216,8 @@
(set_attr "mode" "<MODE>")])
(define_expand "lrint<MODEF:mode><SWI48:mode>2"
- [(set (match_operand:SWI48 0 "nonimmediate_operand")
- (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
+ [(set (match_operand:SWI48 0 "register_operand")
+ (unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")]
UNSPEC_FIX_NOTRUNC))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")
diff --git a/gcc/config/mips/driver-native.cc b/gcc/config/mips/driver-native.cc
index 47627f8..327ad25 100644
--- a/gcc/config/mips/driver-native.cc
+++ b/gcc/config/mips/driver-native.cc
@@ -23,6 +23,9 @@ along with GCC; see the file COPYING3. If not see
#include "system.h"
#include "coretypes.h"
#include "tm.h"
+#ifdef HAVE_SYS_AUXV_H
+#include <sys/auxv.h>
+#endif
/* This will be called by the spec parser in gcc.cc when it sees
a %:local_cpu_detect(args) construct. Currently it will be called
@@ -41,6 +44,7 @@ const char *
host_detect_local_cpu (int argc, const char **argv)
{
const char *cpu = NULL;
+ char *ret = NULL;
char buf[128];
FILE *f;
bool arch;
@@ -54,7 +58,7 @@ host_detect_local_cpu (int argc, const char **argv)
f = fopen ("/proc/cpuinfo", "r");
if (f == NULL)
- return NULL;
+ goto fallback_cpu;
while (fgets (buf, sizeof (buf), f) != NULL)
if (startswith (buf, "cpu model"))
@@ -84,8 +88,23 @@ host_detect_local_cpu (int argc, const char **argv)
fclose (f);
+fallback_cpu:
+#if defined (__mips_nan2008)
+ ret = reconcat (ret, " -mnan=2008 ", NULL);
+#endif
+
+#ifdef HAVE_GETAUXVAL
if (cpu == NULL)
- return NULL;
+ cpu = (const char *) getauxval (AT_BASE_PLATFORM);
+#endif
+
+#if defined (_MIPS_ARCH)
+ if (cpu == NULL)
+ cpu = _MIPS_ARCH;
+#endif
+
+ if (cpu)
+ ret = reconcat (ret, ret, "-m", argv[0], "=", cpu, NULL);
- return concat ("-m", argv[0], "=", cpu, NULL);
+ return ret;
}
diff --git a/gcc/config/pru/pru-protos.h b/gcc/config/pru/pru-protos.h
index 4b190c9..517fa02 100644
--- a/gcc/config/pru/pru-protos.h
+++ b/gcc/config/pru/pru-protos.h
@@ -52,6 +52,7 @@ extern const char *pru_output_signed_cbranch (rtx *, bool);
extern const char *pru_output_signed_cbranch_ubyteop2 (rtx *, bool);
extern const char *pru_output_signed_cbranch_zeroop2 (rtx *, bool);
+extern enum rtx_code pru_noteq_condition (enum rtx_code code);
extern rtx pru_expand_fp_compare (rtx comparison, machine_mode mode);
extern void pru_emit_doloop (rtx *, int);
diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc
index 04eca90..0029dcb 100644
--- a/gcc/config/pru/pru.cc
+++ b/gcc/config/pru/pru.cc
@@ -895,6 +895,27 @@ pru_init_libfuncs (void)
set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull");
}
+/* Given a comparison CODE, return a similar comparison but without
+ the "equals" condition. In other words, it strips GE/GEU/LE/LEU
+ and instead returns GT/GTU/LT/LTU. */
+
+enum rtx_code
+pru_noteq_condition (enum rtx_code code)
+{
+ switch (code)
+ {
+ case GT: return GT;
+ case GTU: return GTU;
+ case GE: return GT;
+ case GEU: return GTU;
+ case LT: return LT;
+ case LTU: return LTU;
+ case LE: return LT;
+ case LEU: return LTU;
+ default:
+ gcc_unreachable ();
+ }
+}
/* Emit comparison instruction if necessary, returning the expression
that holds the compare result in the proper mode. Return the comparison
diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
index 144cd35..bdc5ad7 100644
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -703,6 +703,202 @@
[(set_attr "type" "alu")
(set_attr "length" "12")])
+
+; 64-bit LSHIFTRT with a constant shift count can be expanded into
+; more efficient code sequence than a variable register shift.
+;
+; 1. For shift >= 32:
+; dst_lo = (src_hi >> (shift - 32))
+; dst_hi = 0
+;
+; 2. For shift==1 there is no need for a temporary:
+; dst_lo = (src_lo >> 1)
+; if (src_hi & 1)
+; dst_lo |= (1 << 31)
+; dst_hi = (src_hi >> 1)
+;
+; 3. For shift < 32:
+; dst_lo = (src_lo >> shift)
+; tmp = (src_hi << (32 - shift)
+; dst_lo |= tmp
+; dst_hi = (src_hi >> shift)
+;
+; 4. For shift in a register:
+; Fall back to calling libgcc.
+(define_expand "lshrdi3"
+ [(set (match_operand:DI 0 "register_operand")
+ (lshiftrt:DI
+ (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))]
+ ""
+{
+ gcc_assert (CONST_INT_P (operands[2]));
+
+ const int nshifts = INTVAL (operands[2]);
+ rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+ rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+ rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+ rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+ if (nshifts >= 32)
+ {
+ emit_insn (gen_rtx_SET (dst_lo,
+ gen_rtx_LSHIFTRT (SImode,
+ src_hi,
+ GEN_INT (nshifts - 32))));
+ emit_insn (gen_rtx_SET (dst_hi, const0_rtx));
+ DONE;
+ }
+
+ gcc_assert (can_create_pseudo_p ());
+
+ /* The expansions which follow are safe only if DST_LO and SRC_HI
+ do not overlap. If they do, then fix by using a temporary register.
+ Overlapping of DST_HI and SRC_LO is safe because by the time DST_HI
+ is set, SRC_LO is no longer live. */
+ if (reg_overlap_mentioned_p (dst_lo, src_hi))
+ {
+ rtx new_src_hi = gen_reg_rtx (SImode);
+
+ emit_move_insn (new_src_hi, src_hi);
+ src_hi = new_src_hi;
+ }
+
+ if (nshifts == 1)
+ {
+ rtx_code_label *skip_hiset_label;
+ rtx j;
+
+ emit_insn (gen_rtx_SET (dst_lo,
+ gen_rtx_LSHIFTRT (SImode, src_lo, const1_rtx)));
+
+ /* The code generated by `genemit' would create a LABEL_REF. */
+ skip_hiset_label = gen_label_rtx ();
+ j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
+ SImode,
+ src_hi,
+ GEN_INT (0),
+ skip_hiset_label));
+ JUMP_LABEL (j) = skip_hiset_label;
+ LABEL_NUSES (skip_hiset_label)++;
+
+ emit_insn (gen_iorsi3 (dst_lo, dst_lo, GEN_INT (1 << 31)));
+ emit_label (skip_hiset_label);
+ emit_insn (gen_rtx_SET (dst_hi,
+ gen_rtx_LSHIFTRT (SImode, src_hi, const1_rtx)));
+ DONE;
+ }
+
+ if (nshifts < 32)
+ {
+ rtx tmpval = gen_reg_rtx (SImode);
+
+ emit_insn (gen_rtx_SET (dst_lo,
+ gen_rtx_LSHIFTRT (SImode,
+ src_lo,
+ GEN_INT (nshifts))));
+ emit_insn (gen_rtx_SET (tmpval,
+ gen_rtx_ASHIFT (SImode,
+ src_hi,
+ GEN_INT (32 - nshifts))));
+ emit_insn (gen_iorsi3 (dst_lo, dst_lo, tmpval));
+ emit_insn (gen_rtx_SET (dst_hi,
+ gen_rtx_LSHIFTRT (SImode,
+ src_hi,
+ GEN_INT (nshifts))));
+ DONE;
+ }
+ gcc_unreachable ();
+})
+
+; 64-bit ASHIFT with a constant shift count can be expanded into
+; more efficient code sequence than the libgcc call required by
+; a variable shift in a register.
+
+(define_expand "ashldi3"
+ [(set (match_operand:DI 0 "register_operand")
+ (ashift:DI
+ (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))]
+ ""
+{
+ gcc_assert (CONST_INT_P (operands[2]));
+
+ const int nshifts = INTVAL (operands[2]);
+ rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+ rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+ rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+ rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+ if (nshifts >= 32)
+ {
+ emit_insn (gen_rtx_SET (dst_hi,
+ gen_rtx_ASHIFT (SImode,
+ src_lo,
+ GEN_INT (nshifts - 32))));
+ emit_insn (gen_rtx_SET (dst_lo, const0_rtx));
+ DONE;
+ }
+
+ gcc_assert (can_create_pseudo_p ());
+
+ /* The expansions which follow are safe only if DST_HI and SRC_LO
+ do not overlap. If they do, then fix by using a temporary register.
+ Overlapping of DST_LO and SRC_HI is safe because by the time DST_LO
+ is set, SRC_HI is no longer live. */
+ if (reg_overlap_mentioned_p (dst_hi, src_lo))
+ {
+ rtx new_src_lo = gen_reg_rtx (SImode);
+
+ emit_move_insn (new_src_lo, src_lo);
+ src_lo = new_src_lo;
+ }
+
+ if (nshifts == 1)
+ {
+ rtx_code_label *skip_hiset_label;
+ rtx j;
+
+ emit_insn (gen_rtx_SET (dst_hi,
+ gen_rtx_ASHIFT (SImode, src_hi, const1_rtx)));
+
+ skip_hiset_label = gen_label_rtx ();
+ j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
+ SImode,
+ src_lo,
+ GEN_INT (31),
+ skip_hiset_label));
+ JUMP_LABEL (j) = skip_hiset_label;
+ LABEL_NUSES (skip_hiset_label)++;
+
+ emit_insn (gen_iorsi3 (dst_hi, dst_hi, GEN_INT (1 << 0)));
+ emit_label (skip_hiset_label);
+ emit_insn (gen_rtx_SET (dst_lo,
+ gen_rtx_ASHIFT (SImode, src_lo, const1_rtx)));
+ DONE;
+ }
+
+ if (nshifts < 32)
+ {
+ rtx tmpval = gen_reg_rtx (SImode);
+
+ emit_insn (gen_rtx_SET (dst_hi,
+ gen_rtx_ASHIFT (SImode,
+ src_hi,
+ GEN_INT (nshifts))));
+ emit_insn (gen_rtx_SET (tmpval,
+ gen_rtx_LSHIFTRT (SImode,
+ src_lo,
+ GEN_INT (32 - nshifts))));
+ emit_insn (gen_iorsi3 (dst_hi, dst_hi, tmpval));
+ emit_insn (gen_rtx_SET (dst_lo,
+ gen_rtx_ASHIFT (SImode,
+ src_lo,
+ GEN_INT (nshifts))));
+ DONE;
+ }
+ gcc_unreachable ();
+})
;; Include ALU patterns with zero-extension of operands. That's where
;; the real insns are defined.
@@ -1113,6 +1309,186 @@
operands[2] = XEXP (t, 1);
})
+;; Expand the cbranchdi pattern in order to avoid the default
+;; expansion into word_mode operations, which is not efficient for PRU.
+;; In pseudocode this expansion outputs:
+;;
+;; /* EQ */
+;; if (OP1_hi {reverse_condition (cmp)} OP2_hi)
+;; goto fallthrough
+;; if (OP1_lo {cmp} OP2_lo)
+;; goto label3
+;; fallthrough:
+;;
+;; /* NE */
+;; if (OP1_hi {cmp} OP2_hi)
+;; goto label3
+;; if (OP1_lo {cmp} OP2_lo)
+;; goto label3
+;;
+;; The LT comparisons with zero take one machine instruction to simply
+;; check the sign bit. The GT comparisons with zero take two - one
+;; to check the sign bit, and one to check for zero. Hence arrange
+;; the expand such that only LT comparison is used for OP1_HI, because
+;; OP2_HI is const0_rtx.
+;;
+;; The LTU comparisons with zero will be removed by subsequent passes.
+;;
+;; /* LT/LTU/LE/LEU */
+;; if (OP1_hi {noteq_condition (cmp)} OP2_hi)
+;; goto label3 /* DI comparison obviously true. */
+;; if (OP1_hi != OP2_hi)
+;; goto fallthrough /* DI comparison obviously not true. */
+;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo)
+;; goto label3 /* Comparison was deferred to lo parts. */
+;; fallthrough:
+
+;; /* GT/GTU/GE/GEU */
+;; if (OP1_hi {reverse_condition (noteq_condition (cmp))} OP2_hi)
+;; goto fallthrough /* DI comparison obviously not true. */
+;; if (OP1_hi != OP2_hi)
+;; goto label3 /* DI comparison obviously true. */
+;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo)
+;; goto label3 /* Comparison was deferred to lo parts. */
+;; fallthrough:
+
+(define_expand "cbranchdi4"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "ordered_comparison_operator"
+ [(match_operand:DI 1 "register_operand")
+ (match_operand:DI 2 "reg_or_ubyte_operand")])
+ (label_ref (match_operand 3 ""))
+ (pc)))]
+ ""
+{
+ const enum rtx_code code = GET_CODE (operands[0]);
+ rtx label3 = operands[3];
+ rtx op1_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+ rtx op1_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+ rtx op2_lo = simplify_gen_subreg (SImode, operands[2], DImode, 0);
+ rtx op2_hi = simplify_gen_subreg (SImode, operands[2], DImode, 4);
+ rtx j;
+
+ if (code == EQ)
+ {
+ rtx label_fallthrough = gen_label_rtx ();
+ rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough);
+
+ rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
+ rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
+ label_fallthrough_ref, pc_rtx);
+ j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
+ JUMP_LABEL (j) = label_fallthrough;
+ LABEL_NUSES (label_fallthrough)++;
+
+ rtx label3_ref = gen_rtx_LABEL_REF (Pmode, label3);
+ rtx cond_lo = gen_rtx_fmt_ee (EQ, VOIDmode, op1_lo, op2_lo);
+ rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
+ label3_ref, pc_rtx);
+ j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
+ JUMP_LABEL (j) = label3;
+ LABEL_NUSES (label3)++;
+
+ emit_label (label_fallthrough);
+ DONE;
+ }
+ if (code == NE)
+ {
+ rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3);
+ rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
+ rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
+ label3_ref1, pc_rtx);
+ j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
+ JUMP_LABEL (j) = label3;
+ LABEL_NUSES (label3)++;
+
+ rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3);
+ rtx cond_lo = gen_rtx_fmt_ee (NE, VOIDmode, op1_lo, op2_lo);
+ rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
+ label3_ref2, pc_rtx);
+ j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
+ JUMP_LABEL (j) = label3;
+ LABEL_NUSES (label3)++;
+
+ DONE;
+ }
+
+ if (code == LT || code == LTU || code == LE || code == LEU)
+ {
+ /* Check for "DI comparison obviously true". */
+ rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3);
+ rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (code),
+ VOIDmode, op1_hi, op2_hi);
+ rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
+ label3_ref1, pc_rtx);
+ j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
+ JUMP_LABEL (j) = label3;
+ LABEL_NUSES (label3)++;
+
+ /* Check for "DI comparison obviously not true". */
+ rtx label_fallthrough = gen_label_rtx ();
+ rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough);
+ rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
+ rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine,
+ label_fallthrough_ref, pc_rtx);
+ j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine));
+ JUMP_LABEL (j) = label_fallthrough;
+ LABEL_NUSES (label_fallthrough)++;
+
+ /* Comparison deferred to the lo parts. */
+ rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3);
+ rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code),
+ VOIDmode, op1_lo, op2_lo);
+ rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
+ label3_ref2, pc_rtx);
+ j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
+ JUMP_LABEL (j) = label3;
+ LABEL_NUSES (label3)++;
+
+ emit_label (label_fallthrough);
+ DONE;
+ }
+
+ if (code == GT || code == GTU || code == GE || code == GEU)
+ {
+ /* Check for "DI comparison obviously not true". */
+ const enum rtx_code reversed_code = reverse_condition (code);
+ rtx label_fallthrough = gen_label_rtx ();
+ rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough);
+ rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (reversed_code),
+ VOIDmode, op1_hi, op2_hi);
+ rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
+ label_fallthrough_ref, pc_rtx);
+ j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
+ JUMP_LABEL (j) = label_fallthrough;
+ LABEL_NUSES (label_fallthrough)++;
+
+ /* Check for "DI comparison obviously true". */
+ rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3);
+ rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
+ rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine,
+ label3_ref1, pc_rtx);
+ j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine));
+ JUMP_LABEL (j) = label3;
+ LABEL_NUSES (label3)++;
+
+ /* Comparison deferred to the lo parts. */
+ rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3);
+ rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code),
+ VOIDmode, op1_lo, op2_lo);
+ rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
+ label3_ref2, pc_rtx);
+ j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
+ JUMP_LABEL (j) = label3;
+ LABEL_NUSES (label3)++;
+
+ emit_label (label_fallthrough);
+ DONE;
+ }
+ gcc_unreachable ();
+})
+
;
; Bit test branch
diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index cac0043..78f6eac 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -190,4 +190,4 @@ void
riscv_register_pragmas (void)
{
c_register_pragma ("riscv", "intrinsic", riscv_pragma_intrinsic);
-} \ No newline at end of file
+}
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
index 0096e32..99c4825 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -50,25 +50,56 @@ using namespace riscv_vector;
namespace riscv_vector {
+/* Static information about each vector type. */
+struct vector_type_info
+{
+ /* The name of the type as declared by riscv_vector.h
+ which is recommend to use. For example: 'vint32m1_t'. */
+ const char *name;
+
+ /* ABI name of vector type. The type is always available
+ under this name, even when riscv_vector.h isn't included.
+ For example: '__rvv_int32m1_t'. */
+ const char *abi_name;
+
+ /* The C++ mangling of ABI_NAME. */
+ const char *mangled_name;
+};
+
/* Information about each RVV type. */
static CONSTEXPR const vector_type_info vector_types[] = {
-#define DEF_RVV_TYPE(USER_NAME, NCHARS, ABI_NAME, ARGS...) \
- {#USER_NAME, #ABI_NAME, "u" #NCHARS #ABI_NAME},
+#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, ARGS...) \
+ {#NAME, #ABI_NAME, "u" #NCHARS #ABI_NAME},
#include "riscv-vector-builtins.def"
};
-/* The scalar type associated with each vector type. */
-static GTY (()) tree scalar_types[NUM_VECTOR_TYPES];
-/* The machine mode associated with each vector type. */
-static GTY (()) machine_mode vector_modes[NUM_VECTOR_TYPES];
/* The RVV types, with their built-in
"__rvv..._t" name. Allow an index of NUM_VECTOR_TYPES, which always
yields a null tree. */
-static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1];
+static GTY (()) tree abi_vector_types[NUM_VECTOR_TYPES + 1];
/* Same, but with the riscv_vector.h "v..._t" name. */
-extern GTY(()) tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
-tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
+extern GTY (()) rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1];
+rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1];
+
+/* RAII class for enabling enough RVV features to define the built-in
+ types and implement the riscv_vector.h pragma.
+
+ Note: According to 'TYPE_MODE' macro implementation, we need set
+ have_regs_of_mode[mode] to be true if we want to get the exact mode
+ from 'TYPE_MODE'. However, have_regs_of_mode has not been set yet in
+ targetm.init_builtins (). We need rvv_switcher to set have_regs_of_mode
+ before targetm.init_builtins () and recover back have_regs_of_mode
+ after targetm.init_builtins (). */
+class rvv_switcher
+{
+public:
+ rvv_switcher ();
+ ~rvv_switcher ();
+
+private:
+ bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
+};
rvv_switcher::rvv_switcher ()
{
@@ -93,8 +124,8 @@ add_vector_type_attribute (tree type, const char *mangled_name)
{
tree mangled_name_tree = get_identifier (mangled_name);
tree value = tree_cons (NULL_TREE, mangled_name_tree, NULL_TREE);
- TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("RVV type"), value,
- TYPE_ATTRIBUTES (type));
+ TYPE_ATTRIBUTES (type)
+ = tree_cons (get_identifier ("RVV type"), value, TYPE_ATTRIBUTES (type));
}
/* Force TYPE to be a sizeless type. */
@@ -137,6 +168,39 @@ mangle_builtin_type (const_tree type)
return NULL;
}
+/* Return a representation of "const T *". */
+static tree
+build_const_pointer (tree t)
+{
+ return build_pointer_type (build_qualified_type (t, TYPE_QUAL_CONST));
+}
+
+/* Helper function for register a single built-in RVV ABI type. */
+static void
+register_builtin_type (vector_type_index type, tree eltype, machine_mode mode)
+{
+ builtin_types[type].scalar = eltype;
+ builtin_types[type].scalar_ptr = build_pointer_type (eltype);
+ builtin_types[type].scalar_const_ptr = build_const_pointer (eltype);
+ if (!riscv_v_ext_enabled_vector_mode_p (mode))
+ return;
+
+ tree vectype = build_vector_type_for_mode (eltype, mode);
+ gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype)) && TYPE_MODE (vectype) == mode
+ && TYPE_MODE_RAW (vectype) == mode && TYPE_ALIGN (vectype) <= 128
+ && known_eq (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
+ GET_MODE_BITSIZE (mode)));
+ vectype = build_distinct_type_copy (vectype);
+ gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype));
+ SET_TYPE_STRUCTURAL_EQUALITY (vectype);
+ TYPE_ARTIFICIAL (vectype) = 1;
+ TYPE_INDIVISIBLE_P (vectype) = 1;
+ add_vector_type_attribute (vectype, vector_types[type].mangled_name);
+ make_type_sizeless (vectype);
+ abi_vector_types[type] = vectype;
+ lang_hooks.types.register_builtin_type (vectype, vector_types[type].abi_name);
+}
+
/* Register the built-in RVV ABI types, such as __rvv_int32m1_t. */
static void
register_builtin_types ()
@@ -151,42 +215,12 @@ register_builtin_types ()
= TARGET_64BIT ? unsigned_intSI_type_node : long_unsigned_type_node;
machine_mode mode;
-#define DEF_RVV_TYPE(USER_NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \
- VECTOR_MODE_MIN_VLEN_32) \
+#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \
+ VECTOR_MODE_MIN_VLEN_32, ARGS...) \
mode = TARGET_MIN_VLEN > 32 ? VECTOR_MODE##mode \
: VECTOR_MODE_MIN_VLEN_32##mode; \
- scalar_types[VECTOR_TYPE_##USER_NAME] \
- = riscv_v_ext_enabled_vector_mode_p (mode) ? SCALAR_TYPE##_type_node \
- : NULL_TREE; \
- vector_modes[VECTOR_TYPE_##USER_NAME] \
- = riscv_v_ext_enabled_vector_mode_p (mode) ? mode : VOIDmode;
+ register_builtin_type (VECTOR_TYPE_##NAME, SCALAR_TYPE##_type_node, mode);
#include "riscv-vector-builtins.def"
-
- for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i)
- {
- tree eltype = scalar_types[i];
- mode = vector_modes[i];
- /* We disabled the datatypes according '-march'. */
- if (!eltype)
- continue;
-
- tree vectype = build_vector_type_for_mode (eltype, mode);
- gcc_assert (
- VECTOR_MODE_P (TYPE_MODE (vectype)) && TYPE_MODE (vectype) == mode
- && TYPE_MODE_RAW (vectype) == mode && TYPE_ALIGN (vectype) <= 128
- && known_eq (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
- GET_MODE_BITSIZE (mode)));
- vectype = build_distinct_type_copy (vectype);
- gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype));
- SET_TYPE_STRUCTURAL_EQUALITY (vectype);
- TYPE_ARTIFICIAL (vectype) = 1;
- TYPE_INDIVISIBLE_P (vectype) = 1;
- add_vector_type_attribute (vectype, vector_types[i].mangled_name);
- make_type_sizeless (vectype);
- abi_vector_types[i] = vectype;
- lang_hooks.types.register_builtin_type (vectype,
- vector_types[i].abi_name);
- }
}
/* Register vector type TYPE under its risv_vector.h name. */
@@ -198,7 +232,7 @@ register_vector_type (vector_type_index type)
is disabled according to '-march'. */
if (!vectype)
return;
- tree id = get_identifier (vector_types[type].user_name);
+ tree id = get_identifier (vector_types[type].name);
tree decl = build_decl (input_location, TYPE_DECL, id, vectype);
decl = lang_hooks.decls.pushdecl (decl);
@@ -212,7 +246,8 @@ register_vector_type (vector_type_index type)
&& TYPE_MAIN_VARIANT (TREE_TYPE (decl)) == vectype)
vectype = TREE_TYPE (decl);
- builtin_vector_types[0][type] = vectype;
+ builtin_types[type].vector = vectype;
+ builtin_types[type].vector_ptr = build_pointer_type (vectype);
}
/* Initialize all compiler built-ins related to RVV that should be
diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def
index a9001b3..83603fe 100644
--- a/gcc/config/riscv/riscv-vector-builtins.def
+++ b/gcc/config/riscv/riscv-vector-builtins.def
@@ -32,7 +32,7 @@ along with GCC; see the file COPYING3. If not see
TARGET_MIN_VLEN > 32. Otherwise the machine mode is VNx1SImode. */
#ifndef DEF_RVV_TYPE
-#define DEF_RVV_TYPE(USER_NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \
+#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \
VECTOR_MODE_MIN_VLEN_32)
#endif
diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h
index 6ca0b07..ea67da9 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -23,52 +23,23 @@
namespace riscv_vector {
-/* This is for segment instructions. */
-const unsigned int MAX_TUPLE_SIZE = 8;
-
-/* Static information about each vector type. */
-struct vector_type_info
-{
- /* The name of the type as declared by riscv_vector.h
- which is recommend to use. For example: 'vint32m1_t'. */
- const char *user_name;
-
- /* ABI name of vector type. The type is always available
- under this name, even when riscv_vector.h isn't included.
- For example: '__rvv_int32m1_t'. */
- const char *abi_name;
-
- /* The C++ mangling of ABI_NAME. */
- const char *mangled_name;
-};
-
/* Enumerates the RVV types, together called
"vector types" for brevity. */
enum vector_type_index
{
-#define DEF_RVV_TYPE(USER_NAME, ABI_NAME, NCHARS, ARGS...) \
- VECTOR_TYPE_##USER_NAME,
+#define DEF_RVV_TYPE(NAME, ABI_NAME, NCHARS, ARGS...) VECTOR_TYPE_##NAME,
#include "riscv-vector-builtins.def"
NUM_VECTOR_TYPES
};
-/* RAII class for enabling enough RVV features to define the built-in
- types and implement the riscv_vector.h pragma.
-
- Note: According to 'TYPE_MODE' macro implementation, we need set
- have_regs_of_mode[mode] to be true if we want to get the exact mode
- from 'TYPE_MODE'. However, have_regs_of_mode has not been set yet in
- targetm.init_builtins (). We need rvv_switcher to set have_regs_of_mode
- before targetm.init_builtins () and recover back have_regs_of_mode
- after targetm.init_builtins (). */
-class rvv_switcher
+/* Builtin types that are used to register RVV intrinsics. */
+struct GTY (()) rvv_builtin_types_t
{
-public:
- rvv_switcher ();
- ~rvv_switcher ();
-
-private:
- bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
+ tree vector;
+ tree scalar;
+ tree vector_ptr;
+ tree scalar_ptr;
+ tree scalar_const_ptr;
};
} // end namespace riscv_vector
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 014206f..2d1cda2 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -229,6 +229,7 @@
;; Classification of RVV instructions which will be added to each RVV .md pattern and used by scheduler.
;; rdvlenb vector byte length vlenb csrr read
;; rdvl vector length vl csrr read
+;; vsetvl vector configuration-setting instrucions
;; 7. Vector Loads and Stores
;; vlde vector unit-stride load instructions
;; vste vector unit-stride store instructions
@@ -316,7 +317,7 @@
"unknown,branch,jump,call,load,fpload,store,fpstore,
mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul,
fmadd,fdiv,fcmp,fcvt,fsqrt,multi,auipc,sfb_alu,nop,ghost,bitmanip,rotate,
- rdvlenb,rdvl,vlde,vste,vldm,vstm,vlds,vsts,
+ rdvlenb,rdvl,vsetvl,vlde,vste,vldm,vstm,vlds,vsts,
vldux,vldox,vstux,vstox,vldff,vldr,vstr,
vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,
vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov,
diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h
index 075a451..e7e5ffe 100644
--- a/gcc/config/vxworks.h
+++ b/gcc/config/vxworks.h
@@ -224,14 +224,54 @@ extern void vxworks_driver_init (unsigned int *, struct cl_decoded_option **);
#undef VXWORKS_LINK_SPEC
#define VXWORKS_LINK_SPEC VXWORKS_BASE_LINK_SPEC " " VXWORKS_EXTRA_LINK_SPEC
+/* Control how to include libgcc in the link closure, handling both "shared"
+ and "non-static" in addition to "static-libgcc" when shared lib support is
+ enabled. */
+
#undef VXWORKS_LIBGCC_SPEC
+
+/* libgcc_eh control; libgcc_eh.a is available either together with libgcc_s
+ (mrtp and mcmodel!=large when configured with --enable-shared) or when the
+ compiler is specially setup to support dual sjlj/table-based eh. */
+
+/* VX_LGCC_EH_SO1: The "-lgcc_eh" part we need in situations where we know a
+ shared libgcc is available (ENABLE_SHARED_LIBGCC + mrtp multilib). */
+
+#define VX_LGCC_EH_SO1 " -lgcc_eh -lgcc"
+/* Extra -lgcc to handle functions from libgcc_eh that refer to symbols
+ exposed by libgcc and not guaranteed to be dragged in before -lgcc_eh
+ appears. */
+
+/* VX_LGCC_EH_SO0: The "-lgcc_eh" part we need in situations where we know a
+ shared libgcc is not available (!ENABLE_SHARED_LIBGCC or !mrtp multlib). */
+
+#if !defined(CONFIG_DUAL_EXCEPTIONS)
+
+/* No shared lib && !DUAL_EH -> no libgcc_eh available at all. */
+#define VX_LGCC_EH_SO0
+
+#else /* CONFIG_DUAL_EXCEPTIONS */
+
+/* No shared lib but DUAL_EH -> libgcc_eh around and spec handled by the driver
+ depending on ENABLE_SHARED_LIBGCC. If defined, the driver expects a regular
+ sequence. Otherwise, the driver is expected to turn -lgcc into -lgcc_eh on
+ its own and just add an instance to address possible cross refs. */
+
+#if defined(ENABLE_SHARED_LIBGCC)
+#define VX_LGCC_EH_SO0 " -lgcc_eh -lgcc"
+#else
+#define VX_LGCC_EH_SO0 " -lgcc"
+#endif
+
+#endif /* CONFIG_DUAL_EXCEPTIONS */
+
#if defined(ENABLE_SHARED_LIBGCC)
#define VXWORKS_LIBGCC_SPEC \
-"%{!mrtp:-lgcc -lgcc_eh} \
- %{mrtp:%{!static-libgcc:%{shared|non-static:-lgcc_s;:-lgcc -lgcc_eh}} \
- %{static-libgcc:-lgcc -lgcc_eh}}"
+ "%{!mrtp|mcmodel=large:-lgcc" VX_LGCC_EH_SO0 ";" \
+ " :%{!static-libgcc:%{shared|non-static:-lgcc_s;:-lgcc" VX_LGCC_EH_SO1 "}} \
+ %{static-libgcc:-lgcc" VX_LGCC_EH_SO1 "}}"
#else
-#define VXWORKS_LIBGCC_SPEC "-lgcc"
+#define VXWORKS_LIBGCC_SPEC "-lgcc" VX_LGCC_EH_SO0
#endif
/* Setup the crtstuff begin/end we might need for dwarf EH registration