diff options
Diffstat (limited to 'gcc/config/s390/s390.cc')
-rw-r--r-- | gcc/config/s390/s390.cc | 342 |
1 files changed, 312 insertions, 30 deletions
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 0ff3fd5..de9c15c 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -342,7 +342,7 @@ const struct s390_processor processor_table[] = { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 }, { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 }, { "z16", "arch14", PROCESSOR_3931_Z16, &zEC12_cost, 14 }, - { "arch15", "arch15", PROCESSOR_ARCH15, &zEC12_cost, 15 }, + { "z17", "arch15", PROCESSOR_9175_Z17, &zEC12_cost, 15 }, { "native", "", PROCESSOR_NATIVE, NULL, 0 } }; @@ -916,7 +916,7 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if ((bflags & B_VXE3) && !TARGET_VXE3) { - error ("Builtin %qF requires arch15 or higher", fndecl); + error ("Builtin %qF requires z17 or higher", fndecl); return const0_rtx; } } @@ -5589,8 +5589,7 @@ legitimize_tls_address (rtx addr, rtx reg) new_rtx = force_const_mem (Pmode, new_rtx); emit_move_insn (r2, new_rtx); s390_emit_tls_call_insn (r2, tls_call); - insn = get_insns (); - end_sequence (); + insn = end_sequence (); new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF); temp = gen_reg_rtx (Pmode); @@ -5612,8 +5611,7 @@ legitimize_tls_address (rtx addr, rtx reg) new_rtx = force_const_mem (Pmode, new_rtx); emit_move_insn (r2, new_rtx); s390_emit_tls_call_insn (r2, tls_call); - insn = get_insns (); - end_sequence (); + insn = end_sequence (); new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF); temp = gen_reg_rtx (Pmode); @@ -7210,6 +7208,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count) NULL_RTX, 1, OPTAB_DIRECT); } +/* Expand optab cstoreti4. */ + +void +s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2) +{ + rtx_code code = GET_CODE (cmp); + + if (TARGET_VXE3) + { + rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2); + emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx)); + return; + } + + /* Prior VXE3 emulate the comparison. For an (in)equality test exploit + VECTOR COMPARE EQUAL. For a relational test, first compare the high part + via VECTOR ELEMENT COMPARE (LOGICAL). If the high part does not equal, + then consume the CC immediatelly by a subsequent LOAD ON CONDITION. + Otherweise, if the high part equals, then perform a subsequent VECTOR + COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION. */ + + op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0)); + op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0)); + + if (code == EQ || code == NE) + { + s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ); + return; + } + + /* Normalize code into either GE(U) or GT(U). */ + if (code == LT || code == LE || code == LTU || code == LEU) + { + std::swap (op1, op2); + code = swap_condition (code); + } + + /* For (un)signed comparisons + - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1 + if the relation does _not_ hold. + - high(op1) > high(op2) instruction VECG op2, op1 sets CC1 + if the relation holds. */ + if (code == GT || code == GTU) + std::swap (op1, op2); + machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode; + rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); + emit_insn ( + gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM), + gen_rtx_COMPARE (cc_mode, + gen_rtx_VEC_SELECT (DImode, op1, lane0), + gen_rtx_VEC_SELECT (DImode, op2, lane0)))); + rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM); + rtx lab = gen_label_rtx (); + s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx)); + /* At this point we have that high(op1) == high(op2). Thus, test the low + part, now. For unsigned comparisons + - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1 + if the relation does _not_ hold. + - low(op1) > low(op2) instruction VCHLGS op1, op2 sets CC1 + if the relation holds. */ + std::swap (op1, op2); + emit_insn (gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM), + gen_rtx_COMPARE (CCVIHUmode, op1, op2)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode))))); + emit_label (lab); + /* For (un)signed comparison >= any CC except CC1 means that the relation + holds. For (un)signed comparison > only CC1 means that the relation + holds. */ + rtx_code cmp_code = (code == GE || code == GEU) ? UNGE : LT; + rtx cond = gen_rtx_fmt_ee (cmp_code, CCSmode, ccs_reg, const0_rtx); + emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx)); +} + /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store the result in TARGET. */ @@ -7310,9 +7384,9 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond, /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into TARGET if either all (ALL_P is true) or any (ALL_P is false) of the elements in CMP1 and CMP2 fulfill the comparison. - This function is only used to emit patterns for the vx builtins and - therefore only handles comparison codes required by the - builtins. */ + This function is only used in s390_expand_cstoreti4 and to emit patterns for + the vx builtins and therefore only handles comparison codes required by + those. */ void s390_expand_vec_compare_cc (rtx target, enum rtx_code code, rtx cmp1, rtx cmp2, bool all_p) @@ -7793,8 +7867,7 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2, const0_rtx, ins)) { *seq1 = NULL; - *seq2 = get_insns (); - end_sequence (); + *seq2 = end_sequence (); return tmp; } end_sequence (); @@ -7803,13 +7876,11 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2, /* Failed to use insv. Generate a two part shift and mask. */ start_sequence (); tmp = s390_expand_mask_and_shift (ins, mode, ac->shift); - *seq1 = get_insns (); - end_sequence (); + *seq1 = end_sequence (); start_sequence (); tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT); - *seq2 = get_insns (); - end_sequence (); + *seq2 = end_sequence (); return tmp; } @@ -9204,7 +9275,7 @@ s390_issue_rate (void) case PROCESSOR_3906_Z14: case PROCESSOR_8561_Z15: case PROCESSOR_3931_Z16: - case PROCESSOR_ARCH15: + case PROCESSOR_9175_Z17: default: return 1; } @@ -11735,8 +11806,7 @@ s390_load_got (void) emit_move_insn (got_rtx, s390_got_symbol ()); - insns = get_insns (); - end_sequence (); + insns = end_sequence (); return insns; } @@ -13503,8 +13573,7 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) start_sequence (); emit_move_insn (reg, gen_rtx_REG (Pmode, 1)); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); push_topmost_sequence (); emit_insn_after (seq, entry_of_function ()); @@ -14496,7 +14565,21 @@ s390_call_saved_register_used (tree call_expr) for (reg = 0; reg < nregs; reg++) if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx))) - return true; + { + rtx parm; + /* Allow passing through unmodified value from caller, + see PR119873. */ + if (TREE_CODE (parameter) == SSA_NAME + && SSA_NAME_IS_DEFAULT_DEF (parameter) + && SSA_NAME_VAR (parameter) + && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL + && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter))) + && REG_P (parm) + && REGNO (parm) == REGNO (parm_rtx) + && REG_NREGS (parm) == REG_NREGS (parm_rtx)) + break; + return true; + } } else if (GET_CODE (parm_rtx) == PARALLEL) { @@ -14510,7 +14593,17 @@ s390_call_saved_register_used (tree call_expr) gcc_assert (REG_NREGS (r) == 1); if (!call_used_or_fixed_reg_p (REGNO (r))) - return true; + { + rtx parm; + if (TREE_CODE (parameter) == SSA_NAME + && SSA_NAME_IS_DEFAULT_DEF (parameter) + && SSA_NAME_VAR (parameter) + && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL + && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter))) + && rtx_equal_p (parm_rtx, parm)) + break; + return true; + } } } } @@ -14543,8 +14636,9 @@ s390_function_ok_for_sibcall (tree decl, tree exp) return false; /* Register 6 on s390 is available as an argument register but unfortunately - "caller saved". This makes functions needing this register for arguments - not suitable for sibcalls. */ + "caller saved". This makes functions needing this register for arguments + not suitable for sibcalls, unless the same value is passed from the + caller. */ return !s390_call_saved_register_used (exp); } @@ -15632,7 +15726,6 @@ s390_get_sched_attrmask (rtx_insn *insn) mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; break; case PROCESSOR_3931_Z16: - case PROCESSOR_ARCH15: if (get_attr_z16_cracked (insn)) mask |= S390_SCHED_ATTR_MASK_CRACKED; if (get_attr_z16_expanded (insn)) @@ -15644,6 +15737,18 @@ s390_get_sched_attrmask (rtx_insn *insn) if (get_attr_z16_groupoftwo (insn)) mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; break; + case PROCESSOR_9175_Z17: + if (get_attr_z17_cracked (insn)) + mask |= S390_SCHED_ATTR_MASK_CRACKED; + if (get_attr_z17_expanded (insn)) + mask |= S390_SCHED_ATTR_MASK_EXPANDED; + if (get_attr_z17_endgroup (insn)) + mask |= S390_SCHED_ATTR_MASK_ENDGROUP; + if (get_attr_z17_groupalone (insn)) + mask |= S390_SCHED_ATTR_MASK_GROUPALONE; + if (get_attr_z17_groupoftwo (insn)) + mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; + break; default: gcc_unreachable (); } @@ -15691,7 +15796,6 @@ s390_get_unit_mask (rtx_insn *insn, int *units) mask |= 1 << 3; break; case PROCESSOR_3931_Z16: - case PROCESSOR_ARCH15: *units = 4; if (get_attr_z16_unit_lsu (insn)) mask |= 1 << 0; @@ -15702,6 +15806,17 @@ s390_get_unit_mask (rtx_insn *insn, int *units) if (get_attr_z16_unit_vfu (insn)) mask |= 1 << 3; break; + case PROCESSOR_9175_Z17: + *units = 4; + if (get_attr_z17_unit_lsu (insn)) + mask |= 1 << 0; + if (get_attr_z17_unit_fxa (insn)) + mask |= 1 << 1; + if (get_attr_z17_unit_fxb (insn)) + mask |= 1 << 2; + if (get_attr_z17_unit_vfu (insn)) + mask |= 1 << 3; + break; default: gcc_unreachable (); } @@ -15715,7 +15830,8 @@ s390_is_fpd (rtx_insn *insn) return false; return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn) - || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn); + || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn) + || get_attr_z17_unit_fpd (insn); } static bool @@ -15725,7 +15841,8 @@ s390_is_fxd (rtx_insn *insn) return false; return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn) - || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn); + || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn) + || get_attr_z17_unit_fxd (insn); } /* Returns TRUE if INSN is a long-running instruction. */ @@ -17924,9 +18041,34 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d) static const unsigned char lo_perm_qi_swap[16] = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}; + static const unsigned char hi_perm_qi_di[16] + = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}; + static const unsigned char hi_perm_qi_si[16] + = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}; + static const unsigned char hi_perm_qi_hi[16] + = {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}; + + static const unsigned char lo_perm_qi_di[16] + = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}; + static const unsigned char lo_perm_qi_si[16] + = {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}; + static const unsigned char lo_perm_qi_hi[16] + = {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}; + + static const unsigned char hi_perm_hi_si[8] = {0, 1, 8, 9, 2, 3, 10, 11}; + static const unsigned char hi_perm_hi_di[8] = {0, 1, 2, 3, 8, 9, 10, 11}; + + static const unsigned char lo_perm_hi_si[8] = {4, 5, 12, 13, 6, 7, 14, 15}; + static const unsigned char lo_perm_hi_di[8] = {4, 5, 6, 7, 12, 13, 14, 15}; + + static const unsigned char hi_perm_si_di[4] = {0, 1, 4, 5}; + + static const unsigned char lo_perm_si_di[4] = {2, 3, 6, 7}; + bool merge_lo_p = false; bool merge_hi_p = false; bool swap_operands_p = false; + machine_mode mergemode = d.vmode; if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0) || (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0) @@ -17958,6 +18100,75 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d) merge_lo_p = true; swap_operands_p = true; } + else if (d.nelt == 16) + { + if (memcmp (d.perm, hi_perm_qi_di, 16) == 0) + { + merge_hi_p = true; + mergemode = E_V2DImode; + } + else if (memcmp (d.perm, hi_perm_qi_si, 16) == 0) + { + merge_hi_p = true; + mergemode = E_V4SImode; + } + else if (memcmp (d.perm, hi_perm_qi_hi, 16) == 0) + { + merge_hi_p = true; + mergemode = E_V8HImode; + } + else if (memcmp (d.perm, lo_perm_qi_di, 16) == 0) + { + merge_lo_p = true; + mergemode = E_V2DImode; + } + else if (memcmp (d.perm, lo_perm_qi_si, 16) == 0) + { + merge_lo_p = true; + mergemode = E_V4SImode; + } + else if (memcmp (d.perm, lo_perm_qi_hi, 16) == 0) + { + merge_lo_p = true; + mergemode = E_V8HImode; + } + } + else if (d.nelt == 8) + { + if (memcmp (d.perm, hi_perm_hi_di, 8) == 0) + { + merge_hi_p = true; + mergemode = E_V2DImode; + } + else if (memcmp (d.perm, hi_perm_hi_si, 8) == 0) + { + merge_hi_p = true; + mergemode = E_V4SImode; + } + else if (memcmp (d.perm, lo_perm_hi_di, 8) == 0) + { + merge_lo_p = true; + mergemode = E_V2DImode; + } + else if (memcmp (d.perm, lo_perm_hi_si, 8) == 0) + { + merge_lo_p = true; + mergemode = E_V4SImode; + } + } + else if (d.nelt == 4) + { + if (memcmp (d.perm, hi_perm_si_di, 4) == 0) + { + merge_hi_p = true; + mergemode = E_V2DImode; + } + else if (memcmp (d.perm, lo_perm_si_di, 4) == 0) + { + merge_lo_p = true; + mergemode = E_V2DImode; + } + } if (!merge_lo_p && !merge_hi_p) return false; @@ -17965,7 +18176,7 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d) if (d.testing_p) return merge_lo_p || merge_hi_p; - rtx op0, op1; + rtx op0, op1, target = d.target; if (swap_operands_p) { op0 = d.op1; @@ -17976,12 +18187,80 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d) op0 = d.op0; op1 = d.op1; } + if (mergemode != d.vmode) + { + target = simplify_gen_subreg (mergemode, target, d.vmode, 0); + op0 = simplify_gen_subreg (mergemode, op0, d.vmode, 0); + op1 = simplify_gen_subreg (mergemode, op1, d.vmode, 0); + } - s390_expand_merge (d.target, op0, op1, merge_hi_p); + s390_expand_merge (target, op0, op1, merge_hi_p); return true; } +/* Try to expand the vector permute operation described by D using the vector + pack instruction vpk. Return true if vector pack could be used. */ +static bool +expand_perm_with_pack (const struct expand_vec_perm_d &d) +{ + static const unsigned char qi_hi[16] + = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; + static const unsigned char qi_si[16] + = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; + static const unsigned char qi_di[16] + = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}; + + static const unsigned char hi_si[8] + = {1, 3, 5, 7, 9, 11, 13, 15}; + static const unsigned char hi_di[8] + = {2, 3, 6, 7, 10, 11, 14, 15}; + + static const unsigned char si_di[4] + = {1, 3, 5, 7}; + + machine_mode packmode, resmode; + enum insn_code code = CODE_FOR_nothing; + + if (d.nelt == 16 && memcmp (d.perm, qi_hi, 16) == 0) + { + packmode = E_V8HImode; + resmode = E_V16QImode; + code = CODE_FOR_vec_pack_trunc_v8hi; + } + else if ((d.nelt == 16 && memcmp (d.perm, qi_si, 16) == 0) + || (d.nelt == 8 && memcmp (d.perm, hi_si, 8) == 0)) + { + packmode = E_V4SImode; + resmode = E_V8HImode; + code = CODE_FOR_vec_pack_trunc_v4si; + } + else if ((d.nelt == 16 && memcmp (d.perm, qi_di, 16) == 0) + || (d.nelt == 8 && memcmp (d.perm, hi_di, 8) == 0) + || (d.nelt == 4 && memcmp (d.perm, si_di, 4) == 0)) + { + packmode = E_V2DImode; + resmode = E_V4SImode; + code = CODE_FOR_vec_pack_trunc_v2di; + } + + if (code == CODE_FOR_nothing) + return false; + + if (d.testing_p) + return true; + rtx target = simplify_gen_subreg (resmode, d.target, d.vmode, 0); + rtx op0 = simplify_gen_subreg (packmode, + force_reg (GET_MODE (d.op0), d.op0), + d.vmode, 0); + rtx op1 = simplify_gen_subreg (packmode, + force_reg (GET_MODE (d.op1), d.op1), + d.vmode, 0); + rtx pat = GEN_FCN (code) (target, op0, op1); + emit_insn (pat); + return true; +} + /* Try to expand the vector permute operation described by D using the vector permute doubleword immediate instruction vpdi. Return true if vpdi could be used. @@ -18205,6 +18484,9 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d) if (expand_perm_with_merge (d)) return true; + if (expand_perm_with_pack (d)) + return true; + if (expand_perm_with_vpdi (d)) return true; |