aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/s390/s390.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/s390/s390.cc')
-rw-r--r--gcc/config/s390/s390.cc342
1 files changed, 312 insertions, 30 deletions
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 0ff3fd5..de9c15c 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -342,7 +342,7 @@ const struct s390_processor processor_table[] =
{ "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
{ "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
{ "z16", "arch14", PROCESSOR_3931_Z16, &zEC12_cost, 14 },
- { "arch15", "arch15", PROCESSOR_ARCH15, &zEC12_cost, 15 },
+ { "z17", "arch15", PROCESSOR_9175_Z17, &zEC12_cost, 15 },
{ "native", "", PROCESSOR_NATIVE, NULL, 0 }
};
@@ -916,7 +916,7 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if ((bflags & B_VXE3) && !TARGET_VXE3)
{
- error ("Builtin %qF requires arch15 or higher", fndecl);
+ error ("Builtin %qF requires z17 or higher", fndecl);
return const0_rtx;
}
}
@@ -5589,8 +5589,7 @@ legitimize_tls_address (rtx addr, rtx reg)
new_rtx = force_const_mem (Pmode, new_rtx);
emit_move_insn (r2, new_rtx);
s390_emit_tls_call_insn (r2, tls_call);
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
temp = gen_reg_rtx (Pmode);
@@ -5612,8 +5611,7 @@ legitimize_tls_address (rtx addr, rtx reg)
new_rtx = force_const_mem (Pmode, new_rtx);
emit_move_insn (r2, new_rtx);
s390_emit_tls_call_insn (r2, tls_call);
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
temp = gen_reg_rtx (Pmode);
@@ -7210,6 +7208,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
NULL_RTX, 1, OPTAB_DIRECT);
}
+/* Expand optab cstoreti4. */
+
+void
+s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2)
+{
+ rtx_code code = GET_CODE (cmp);
+
+ if (TARGET_VXE3)
+ {
+ rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2);
+ emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+ return;
+ }
+
+ /* Prior VXE3 emulate the comparison. For an (in)equality test exploit
+ VECTOR COMPARE EQUAL. For a relational test, first compare the high part
+ via VECTOR ELEMENT COMPARE (LOGICAL). If the high part does not equal,
+ then consume the CC immediatelly by a subsequent LOAD ON CONDITION.
+ Otherweise, if the high part equals, then perform a subsequent VECTOR
+ COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION. */
+
+ op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+ op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+
+ if (code == EQ || code == NE)
+ {
+ s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ);
+ return;
+ }
+
+ /* Normalize code into either GE(U) or GT(U). */
+ if (code == LT || code == LE || code == LTU || code == LEU)
+ {
+ std::swap (op1, op2);
+ code = swap_condition (code);
+ }
+
+ /* For (un)signed comparisons
+ - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1
+ if the relation does _not_ hold.
+ - high(op1) > high(op2) instruction VECG op2, op1 sets CC1
+ if the relation holds. */
+ if (code == GT || code == GTU)
+ std::swap (op1, op2);
+ machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode;
+ rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+ emit_insn (
+ gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
+ gen_rtx_COMPARE (cc_mode,
+ gen_rtx_VEC_SELECT (DImode, op1, lane0),
+ gen_rtx_VEC_SELECT (DImode, op2, lane0))));
+ rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM);
+ rtx lab = gen_label_rtx ();
+ s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx));
+ /* At this point we have that high(op1) == high(op2). Thus, test the low
+ part, now. For unsigned comparisons
+ - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1
+ if the relation does _not_ hold.
+ - low(op1) > low(op2) instruction VCHLGS op1, op2 sets CC1
+ if the relation holds. */
+ std::swap (op1, op2);
+ emit_insn (gen_rtx_PARALLEL (
+ VOIDmode,
+ gen_rtvec (2,
+ gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM),
+ gen_rtx_COMPARE (CCVIHUmode, op1, op2)),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)))));
+ emit_label (lab);
+ /* For (un)signed comparison >= any CC except CC1 means that the relation
+ holds. For (un)signed comparison > only CC1 means that the relation
+ holds. */
+ rtx_code cmp_code = (code == GE || code == GEU) ? UNGE : LT;
+ rtx cond = gen_rtx_fmt_ee (cmp_code, CCSmode, ccs_reg, const0_rtx);
+ emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+}
+
/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
the result in TARGET. */
@@ -7310,9 +7384,9 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
elements in CMP1 and CMP2 fulfill the comparison.
- This function is only used to emit patterns for the vx builtins and
- therefore only handles comparison codes required by the
- builtins. */
+ This function is only used in s390_expand_cstoreti4 and to emit patterns for
+ the vx builtins and therefore only handles comparison codes required by
+ those. */
void
s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
rtx cmp1, rtx cmp2, bool all_p)
@@ -7793,8 +7867,7 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
const0_rtx, ins))
{
*seq1 = NULL;
- *seq2 = get_insns ();
- end_sequence ();
+ *seq2 = end_sequence ();
return tmp;
}
end_sequence ();
@@ -7803,13 +7876,11 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
/* Failed to use insv. Generate a two part shift and mask. */
start_sequence ();
tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
- *seq1 = get_insns ();
- end_sequence ();
+ *seq1 = end_sequence ();
start_sequence ();
tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
- *seq2 = get_insns ();
- end_sequence ();
+ *seq2 = end_sequence ();
return tmp;
}
@@ -9204,7 +9275,7 @@ s390_issue_rate (void)
case PROCESSOR_3906_Z14:
case PROCESSOR_8561_Z15:
case PROCESSOR_3931_Z16:
- case PROCESSOR_ARCH15:
+ case PROCESSOR_9175_Z17:
default:
return 1;
}
@@ -11735,8 +11806,7 @@ s390_load_got (void)
emit_move_insn (got_rtx, s390_got_symbol ());
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
return insns;
}
@@ -13503,8 +13573,7 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
start_sequence ();
emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
push_topmost_sequence ();
emit_insn_after (seq, entry_of_function ());
@@ -14496,7 +14565,21 @@ s390_call_saved_register_used (tree call_expr)
for (reg = 0; reg < nregs; reg++)
if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
- return true;
+ {
+ rtx parm;
+ /* Allow passing through unmodified value from caller,
+ see PR119873. */
+ if (TREE_CODE (parameter) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (parameter)
+ && SSA_NAME_VAR (parameter)
+ && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL
+ && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter)))
+ && REG_P (parm)
+ && REGNO (parm) == REGNO (parm_rtx)
+ && REG_NREGS (parm) == REG_NREGS (parm_rtx))
+ break;
+ return true;
+ }
}
else if (GET_CODE (parm_rtx) == PARALLEL)
{
@@ -14510,7 +14593,17 @@ s390_call_saved_register_used (tree call_expr)
gcc_assert (REG_NREGS (r) == 1);
if (!call_used_or_fixed_reg_p (REGNO (r)))
- return true;
+ {
+ rtx parm;
+ if (TREE_CODE (parameter) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (parameter)
+ && SSA_NAME_VAR (parameter)
+ && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL
+ && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter)))
+ && rtx_equal_p (parm_rtx, parm))
+ break;
+ return true;
+ }
}
}
}
@@ -14543,8 +14636,9 @@ s390_function_ok_for_sibcall (tree decl, tree exp)
return false;
/* Register 6 on s390 is available as an argument register but unfortunately
- "caller saved". This makes functions needing this register for arguments
- not suitable for sibcalls. */
+ "caller saved". This makes functions needing this register for arguments
+ not suitable for sibcalls, unless the same value is passed from the
+ caller. */
return !s390_call_saved_register_used (exp);
}
@@ -15632,7 +15726,6 @@ s390_get_sched_attrmask (rtx_insn *insn)
mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
break;
case PROCESSOR_3931_Z16:
- case PROCESSOR_ARCH15:
if (get_attr_z16_cracked (insn))
mask |= S390_SCHED_ATTR_MASK_CRACKED;
if (get_attr_z16_expanded (insn))
@@ -15644,6 +15737,18 @@ s390_get_sched_attrmask (rtx_insn *insn)
if (get_attr_z16_groupoftwo (insn))
mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
break;
+ case PROCESSOR_9175_Z17:
+ if (get_attr_z17_cracked (insn))
+ mask |= S390_SCHED_ATTR_MASK_CRACKED;
+ if (get_attr_z17_expanded (insn))
+ mask |= S390_SCHED_ATTR_MASK_EXPANDED;
+ if (get_attr_z17_endgroup (insn))
+ mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
+ if (get_attr_z17_groupalone (insn))
+ mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
+ if (get_attr_z17_groupoftwo (insn))
+ mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
+ break;
default:
gcc_unreachable ();
}
@@ -15691,7 +15796,6 @@ s390_get_unit_mask (rtx_insn *insn, int *units)
mask |= 1 << 3;
break;
case PROCESSOR_3931_Z16:
- case PROCESSOR_ARCH15:
*units = 4;
if (get_attr_z16_unit_lsu (insn))
mask |= 1 << 0;
@@ -15702,6 +15806,17 @@ s390_get_unit_mask (rtx_insn *insn, int *units)
if (get_attr_z16_unit_vfu (insn))
mask |= 1 << 3;
break;
+ case PROCESSOR_9175_Z17:
+ *units = 4;
+ if (get_attr_z17_unit_lsu (insn))
+ mask |= 1 << 0;
+ if (get_attr_z17_unit_fxa (insn))
+ mask |= 1 << 1;
+ if (get_attr_z17_unit_fxb (insn))
+ mask |= 1 << 2;
+ if (get_attr_z17_unit_vfu (insn))
+ mask |= 1 << 3;
+ break;
default:
gcc_unreachable ();
}
@@ -15715,7 +15830,8 @@ s390_is_fpd (rtx_insn *insn)
return false;
return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
- || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn);
+ || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn)
+ || get_attr_z17_unit_fpd (insn);
}
static bool
@@ -15725,7 +15841,8 @@ s390_is_fxd (rtx_insn *insn)
return false;
return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
- || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn);
+ || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn)
+ || get_attr_z17_unit_fxd (insn);
}
/* Returns TRUE if INSN is a long-running instruction. */
@@ -17924,9 +18041,34 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
static const unsigned char lo_perm_qi_swap[16]
= {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15};
+ static const unsigned char hi_perm_qi_di[16]
+ = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
+ static const unsigned char hi_perm_qi_si[16]
+ = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
+ static const unsigned char hi_perm_qi_hi[16]
+ = {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23};
+
+ static const unsigned char lo_perm_qi_di[16]
+ = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
+ static const unsigned char lo_perm_qi_si[16]
+ = {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
+ static const unsigned char lo_perm_qi_hi[16]
+ = {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31};
+
+ static const unsigned char hi_perm_hi_si[8] = {0, 1, 8, 9, 2, 3, 10, 11};
+ static const unsigned char hi_perm_hi_di[8] = {0, 1, 2, 3, 8, 9, 10, 11};
+
+ static const unsigned char lo_perm_hi_si[8] = {4, 5, 12, 13, 6, 7, 14, 15};
+ static const unsigned char lo_perm_hi_di[8] = {4, 5, 6, 7, 12, 13, 14, 15};
+
+ static const unsigned char hi_perm_si_di[4] = {0, 1, 4, 5};
+
+ static const unsigned char lo_perm_si_di[4] = {2, 3, 6, 7};
+
bool merge_lo_p = false;
bool merge_hi_p = false;
bool swap_operands_p = false;
+ machine_mode mergemode = d.vmode;
if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0)
|| (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0)
@@ -17958,6 +18100,75 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
merge_lo_p = true;
swap_operands_p = true;
}
+ else if (d.nelt == 16)
+ {
+ if (memcmp (d.perm, hi_perm_qi_di, 16) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V2DImode;
+ }
+ else if (memcmp (d.perm, hi_perm_qi_si, 16) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V4SImode;
+ }
+ else if (memcmp (d.perm, hi_perm_qi_hi, 16) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V8HImode;
+ }
+ else if (memcmp (d.perm, lo_perm_qi_di, 16) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V2DImode;
+ }
+ else if (memcmp (d.perm, lo_perm_qi_si, 16) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V4SImode;
+ }
+ else if (memcmp (d.perm, lo_perm_qi_hi, 16) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V8HImode;
+ }
+ }
+ else if (d.nelt == 8)
+ {
+ if (memcmp (d.perm, hi_perm_hi_di, 8) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V2DImode;
+ }
+ else if (memcmp (d.perm, hi_perm_hi_si, 8) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V4SImode;
+ }
+ else if (memcmp (d.perm, lo_perm_hi_di, 8) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V2DImode;
+ }
+ else if (memcmp (d.perm, lo_perm_hi_si, 8) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V4SImode;
+ }
+ }
+ else if (d.nelt == 4)
+ {
+ if (memcmp (d.perm, hi_perm_si_di, 4) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V2DImode;
+ }
+ else if (memcmp (d.perm, lo_perm_si_di, 4) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V2DImode;
+ }
+ }
if (!merge_lo_p && !merge_hi_p)
return false;
@@ -17965,7 +18176,7 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
if (d.testing_p)
return merge_lo_p || merge_hi_p;
- rtx op0, op1;
+ rtx op0, op1, target = d.target;
if (swap_operands_p)
{
op0 = d.op1;
@@ -17976,12 +18187,80 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
op0 = d.op0;
op1 = d.op1;
}
+ if (mergemode != d.vmode)
+ {
+ target = simplify_gen_subreg (mergemode, target, d.vmode, 0);
+ op0 = simplify_gen_subreg (mergemode, op0, d.vmode, 0);
+ op1 = simplify_gen_subreg (mergemode, op1, d.vmode, 0);
+ }
- s390_expand_merge (d.target, op0, op1, merge_hi_p);
+ s390_expand_merge (target, op0, op1, merge_hi_p);
return true;
}
+/* Try to expand the vector permute operation described by D using the vector
+ pack instruction vpk. Return true if vector pack could be used. */
+static bool
+expand_perm_with_pack (const struct expand_vec_perm_d &d)
+{
+ static const unsigned char qi_hi[16]
+ = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+ static const unsigned char qi_si[16]
+ = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};
+ static const unsigned char qi_di[16]
+ = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31};
+
+ static const unsigned char hi_si[8]
+ = {1, 3, 5, 7, 9, 11, 13, 15};
+ static const unsigned char hi_di[8]
+ = {2, 3, 6, 7, 10, 11, 14, 15};
+
+ static const unsigned char si_di[4]
+ = {1, 3, 5, 7};
+
+ machine_mode packmode, resmode;
+ enum insn_code code = CODE_FOR_nothing;
+
+ if (d.nelt == 16 && memcmp (d.perm, qi_hi, 16) == 0)
+ {
+ packmode = E_V8HImode;
+ resmode = E_V16QImode;
+ code = CODE_FOR_vec_pack_trunc_v8hi;
+ }
+ else if ((d.nelt == 16 && memcmp (d.perm, qi_si, 16) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, hi_si, 8) == 0))
+ {
+ packmode = E_V4SImode;
+ resmode = E_V8HImode;
+ code = CODE_FOR_vec_pack_trunc_v4si;
+ }
+ else if ((d.nelt == 16 && memcmp (d.perm, qi_di, 16) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, hi_di, 8) == 0)
+ || (d.nelt == 4 && memcmp (d.perm, si_di, 4) == 0))
+ {
+ packmode = E_V2DImode;
+ resmode = E_V4SImode;
+ code = CODE_FOR_vec_pack_trunc_v2di;
+ }
+
+ if (code == CODE_FOR_nothing)
+ return false;
+
+ if (d.testing_p)
+ return true;
+ rtx target = simplify_gen_subreg (resmode, d.target, d.vmode, 0);
+ rtx op0 = simplify_gen_subreg (packmode,
+ force_reg (GET_MODE (d.op0), d.op0),
+ d.vmode, 0);
+ rtx op1 = simplify_gen_subreg (packmode,
+ force_reg (GET_MODE (d.op1), d.op1),
+ d.vmode, 0);
+ rtx pat = GEN_FCN (code) (target, op0, op1);
+ emit_insn (pat);
+ return true;
+}
+
/* Try to expand the vector permute operation described by D using the
vector permute doubleword immediate instruction vpdi. Return true
if vpdi could be used.
@@ -18205,6 +18484,9 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
if (expand_perm_with_merge (d))
return true;
+ if (expand_perm_with_pack (d))
+ return true;
+
if (expand_perm_with_vpdi (d))
return true;