diff options
author | Ian Lance Taylor <iant@golang.org> | 2021-03-11 16:12:22 -0800 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2021-03-11 16:12:22 -0800 |
commit | bc636c218f2b28da06cd1404d5b35d1f8cc43fd1 (patch) | |
tree | 764937d8460563db6132d7c75e19b95ef3ea6ea8 /gcc/config | |
parent | 89d7be42db00cd0953e7d4584877cf50a56ed046 (diff) | |
parent | 7ad5a72c8bc6aa71a0d195ddfa207db01265fe0b (diff) | |
download | gcc-bc636c218f2b28da06cd1404d5b35d1f8cc43fd1.zip gcc-bc636c218f2b28da06cd1404d5b35d1f8cc43fd1.tar.gz gcc-bc636c218f2b28da06cd1404d5b35d1f8cc43fd1.tar.bz2 |
Merge from trunk revision 7ad5a72c8bc6aa71a0d195ddfa207db01265fe0b.
Diffstat (limited to 'gcc/config')
74 files changed, 3063 insertions, 752 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 71aa77d..348a43d 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1738,7 +1738,7 @@ (vec_concat:<VNARROWQ2> (truncate:<VNARROWQ> (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_rshift_imm"))) + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))) (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "shrn\\t%0.<Vntype>, %1.<Vtype>, %2" @@ -1751,7 +1751,7 @@ (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero") (truncate:<VNARROWQ> (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_rshift_imm")))))] + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "shrn\\t%0.<Vntype>, %1.<Vtype>, %2" [(set_attr "type" "neon_shift_imm_narrow_q")] @@ -1786,8 +1786,8 @@ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") (vec_concat:<VNARROWQ2> (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_rshift_imm") - ] UNSPEC_RSHRN) + (match_operand:VQN 2 + "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN) (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2" @@ -1799,8 +1799,8 @@ (vec_concat:<VNARROWQ2> (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero") (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_rshift_imm") - ] UNSPEC_RSHRN)))] + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")] + UNSPEC_RSHRN)))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2" [(set_attr "type" "neon_shift_imm_narrow_q")] @@ -1836,7 +1836,7 @@ (match_operand:<VNARROWQ> 1 "register_operand" "0") (truncate:<VNARROWQ> (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshift_imm")))))] + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" [(set_attr "type" "neon_shift_imm_narrow_q")] @@ -1847,7 +1847,8 @@ (vec_concat:<VNARROWQ2> (truncate:<VNARROWQ> (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshift_imm"))) + (match_operand:VQN 3 + "aarch64_simd_shift_imm_vec_<vn_mode>"))) (match_operand:<VNARROWQ> 1 "register_operand" "0")))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" @@ -1878,7 +1879,8 @@ (vec_concat:<VNARROWQ2> (match_operand:<VNARROWQ> 1 "register_operand" "0") (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshift_imm")] UNSPEC_RSHRN)))] + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")] + UNSPEC_RSHRN)))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" [(set_attr "type" "neon_shift_imm_narrow_q")] @@ -1888,7 +1890,8 @@ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") (vec_concat:<VNARROWQ2> (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshift_imm")] UNSPEC_RSHRN) + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")] + UNSPEC_RSHRN) (match_operand:<VNARROWQ> 1 "register_operand" "0")))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 6270b51..25612d2 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -1467,6 +1467,9 @@ function_resolver::require_vector_type (unsigned int argno, { tree expected = acle_vector_types[0][type]; tree actual = get_argument_type (argno); + if (actual == error_mark_node) + return false; + if (!matches_type_p (expected, actual)) { error_at (location, "passing %qT to argument %d of %qE, which" diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 6083196..7db2938 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -4549,10 +4549,8 @@ } else { - amount = gen_reg_rtx (<MODE>mode); - emit_insn (gen_vec_duplicate<mode> (amount, - convert_to_mode (<VEL>mode, - operands[2], 0))); + amount = convert_to_mode (<VEL>mode, operands[2], 0); + amount = expand_vector_broadcast (<MODE>mode, amount); } emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); DONE; diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index aae9952..588edf4 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -46,4 +46,6 @@ AARCH64_EXTRA_TUNING_OPTION ("no_ldp_stp_qregs", NO_LDP_STP_QREGS) AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS) +AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS) + #undef AARCH64_EXTRA_TUNING_OPTION diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 146ed8c..8a86889 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1492,7 +1492,7 @@ static const struct tune_params neoversev1_tunings = 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS), /* tune_flags. */ &generic_prefetch_tune }; @@ -12589,8 +12589,18 @@ cost_plus: *cost += rtx_cost (op0, mode, PLUS, 0, speed); if (speed) - /* ADD (immediate). */ - *cost += extra_cost->alu.arith; + { + /* ADD (immediate). */ + *cost += extra_cost->alu.arith; + + /* Some tunings prefer to not use the VL-based scalar ops. + Increase the cost of the poly immediate to prevent their + formation. */ + if (GET_CODE (op1) == CONST_POLY_INT + && (aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS)) + *cost += COSTS_N_INSNS (1); + } return true; } @@ -13492,6 +13502,9 @@ aarch64_init_builtins () { aarch64_general_init_builtins (); aarch64_sve::init_builtins (); +#ifdef SUBTARGET_INIT_BUILTINS + SUBTARGET_INIT_BUILTINS; +#endif } /* Implement TARGET_FOLD_BUILTIN. */ @@ -17263,7 +17276,7 @@ aarch64_composite_type_p (const_tree type, parameter passing registers are available). Upon successful return, *COUNT returns the number of needed registers, - *BASE_MODE returns the mode of the individual register and when IS_HAF + *BASE_MODE returns the mode of the individual register and when IS_HA is not NULL, *IS_HA indicates whether or not the argument is a homogeneous floating-point aggregate or a homogeneous short-vector aggregate. diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index a482419..b2abb5b 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1933,6 +1933,14 @@ && (!REG_P (op1) || !REGNO_PTR_FRAME_P (REGNO (op1)))) operands[2] = force_reg (<MODE>mode, operands[2]); + /* Some tunings prefer to avoid VL-based operations. + Split off the poly immediate here. The rtx costs hook will reject attempts + to combine them back. */ + else if (GET_CODE (operands[2]) == CONST_POLY_INT + && can_create_pseudo_p () + && (aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS)) + operands[2] = force_reg (<MODE>mode, operands[2]); /* Expand polynomial additions now if the destination is the stack pointer, since we don't want to use that as a temporary. */ else if (operands[0] == stack_pointer_rtx @@ -4421,7 +4429,7 @@ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")) (match_operand:GPI 3 "register_operand" "r")))] "" - "<logical>\\t%<w>0, %<w>3, %<w>1, ror (<sizen> - %2)" + "<logical>\\t%<w>0, %<w>3, %<w>1, ror #(<sizen> - %2)" [(set_attr "type" "logic_shift_imm")] ) @@ -4446,7 +4454,7 @@ (match_operand:QI 2 "aarch64_shift_imm_si" "n")) (match_operand:SI 3 "register_operand" "r"))))] "" - "<logical>\\t%w0, %w3, %w1, ror (32 - %2)" + "<logical>\\t%w0, %w3, %w1, ror #(32 - %2)" [(set_attr "type" "logic_shift_imm")] ) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 75612fd..c55842b 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -545,6 +545,22 @@ (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 1, 64)"))) +(define_predicate "aarch64_simd_shift_imm_vec_qi" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)"))) + +(define_predicate "aarch64_simd_shift_imm_vec_hi" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 16)"))) + +(define_predicate "aarch64_simd_shift_imm_vec_si" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 32)"))) + +(define_predicate "aarch64_simd_shift_imm_vec_di" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 64)"))) + (define_predicate "aarch64_simd_shift_imm_bitsize_qi" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 0, 8)"))) diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index 367e4c9..3201c3f 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -10285,23 +10285,6 @@ arc_regno_use_in (unsigned int regno, rtx x) return NULL_RTX; } -/* Return the integer value of the "type" attribute for INSN, or -1 if - INSN can't have attributes. */ - -static int -arc_attr_type (rtx_insn *insn) -{ - if (NONJUMP_INSN_P (insn) - ? (GET_CODE (PATTERN (insn)) == USE - || GET_CODE (PATTERN (insn)) == CLOBBER) - : JUMP_P (insn) - ? (GET_CODE (PATTERN (insn)) == ADDR_VEC - || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) - : !CALL_P (insn)) - return -1; - return get_attr_type (insn); -} - /* Code has a minimum p2 alignment of 1, which we must restore after an ADDR_DIFF_VEC. */ diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 7a9cf3d..b6171e8 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -144,9 +144,9 @@ struct cpu_cost_table const struct vector_cost_table vect; }; -rtx_insn * -arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/, - vec<const char *> &constraints, - vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs); +rtx_insn *arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, + vec<const char *> &constraints, + vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs); #endif /* GCC_AARCH_COMMON_PROTOS_H */ diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c index 6ff4215..24711d5 100644 --- a/gcc/config/arm/aarch-common.c +++ b/gcc/config/arm/aarch-common.c @@ -531,9 +531,10 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) We implement asm flag outputs. */ rtx_insn * -arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/, - vec<const char *> &constraints, - vec<rtx> &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/) +arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, + vec<const char *> &constraints, vec<rtx> & /*clobbers*/, + HARD_REG_SET & /*clobbered_regs*/) { bool saw_asm_flag = false; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index d254f41..49635bc 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -328,9 +328,10 @@ static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode); static bool arm_hard_regno_mode_ok (unsigned int, machine_mode); static bool arm_modes_tieable_p (machine_mode, machine_mode); static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT); -static rtx_insn * thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &, - vec<const char *> &, vec<rtx> &, - HARD_REG_SET &); +static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &, + vec<machine_mode> &, + vec<const char *> &, vec<rtx> &, + HARD_REG_SET &); /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -33915,9 +33916,10 @@ arm_run_selftests (void) Unlike the arm version, we do NOT implement asm flag outputs. */ rtx_insn * -thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/, - vec<const char *> &constraints, - vec<rtx> &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/) +thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, + vec<const char *> &constraints, vec<rtx> & /*clobbers*/, + HARD_REG_SET & /*clobbered_regs*/) { for (unsigned i = 0, n = outputs.length (); i < n; ++i) if (strncmp (constraints[i], "=@cc", 4) == 0) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 6bc03ad..113c015 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -390,7 +390,10 @@ emission of floating point pcs attributes. */ --with-float is ignored if -mfloat-abi is specified. --with-fpu is ignored if -mfpu is specified. --with-abi is ignored if -mabi is specified. - --with-tls is ignored if -mtls-dialect is specified. */ + --with-tls is ignored if -mtls-dialect is specified. + Note: --with-mode is not handled here, that has a special rule + TARGET_MODE_CHECK that also takes into account the selected CPU and + architecture. */ #define OPTION_DEFAULT_SPECS \ {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \ {"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ @@ -398,7 +401,6 @@ emission of floating point pcs attributes. */ {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" }, \ {"fpu", "%{!mfpu=*:-mfpu=%(VALUE)}"}, \ {"abi", "%{!mabi=*:-mabi=%(VALUE)}"}, \ - {"mode", "%{!marm:%{!mthumb:-m%(VALUE)}}"}, \ {"tls", "%{!mtls-dialect=*:-mtls-dialect=%(VALUE)}"}, extern const struct arm_fpu_desc @@ -2424,9 +2426,9 @@ extern const char *arm_asm_auto_mfpu (int argc, const char **argv); " mcpu=*:-mcpu=%:rewrite_mcpu(%{mcpu=*:%*})" \ " }" -extern const char *arm_target_thumb_only (int argc, const char **argv); +extern const char *arm_target_mode (int argc, const char **argv); #define TARGET_MODE_SPEC_FUNCTIONS \ - { "target_mode_check", arm_target_thumb_only }, + { "target_mode_check", arm_target_mode }, /* -mcpu=native handling only makes sense with compiler running on an ARM chip. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 3e441f9..45a471a 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -9216,6 +9216,11 @@ else { rtx mem = force_const_mem (SImode, operands[1]); + if (!general_operand (mem, SImode)) + { + emit_move_insn (operands[2], XEXP (mem, 0)); + mem = replace_equiv_address (mem, operands[2], false); + } emit_move_insn (operands[2], mem); } } @@ -9299,6 +9304,11 @@ else { rtx mem = force_const_mem (SImode, operands[1]); + if (!general_operand (mem, SImode)) + { + emit_move_insn (operands[3], XEXP (mem, 0)); + mem = replace_equiv_address (mem, operands[3], false); + } emit_move_insn (operands[3], mem); } } diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index d7fd96c..5772f4d 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -536,19 +536,26 @@ [(set_attr "type" "call")] ) -(define_insn "*nonsecure_call_reg_thumb2" +(define_insn "*nonsecure_call_reg_thumb2_fpcxt" [(call (unspec:SI [(mem:SI (match_operand:SI 0 "s_register_operand" "l*r"))] UNSPEC_NONSECURE_MEM) (match_operand 1 "" "")) (use (match_operand 2 "" "")) (clobber (reg:SI LR_REGNUM))] - "TARGET_THUMB2 && use_cmse" - { - if (TARGET_HAVE_FPCXT_CMSE) - return "blxns\\t%0"; - else - return "bl\\t__gnu_cmse_nonsecure_call"; - } + "TARGET_THUMB2 && use_cmse && TARGET_HAVE_FPCXT_CMSE" + "blxns\\t%0" + [(set_attr "length" "4") + (set_attr "type" "call")] +) + +(define_insn "*nonsecure_call_reg_thumb2" + [(call (unspec:SI [(mem:SI (reg:SI R4_REGNUM))] + UNSPEC_NONSECURE_MEM) + (match_operand 0 "" "")) + (use (match_operand 1 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB2 && use_cmse && !TARGET_HAVE_FPCXT_CMSE" + "bl\\t__gnu_cmse_nonsecure_call" [(set_attr "length" "4") (set_attr "type" "call")] ) @@ -564,7 +571,7 @@ [(set_attr "type" "call")] ) -(define_insn "*nonsecure_call_value_reg_thumb2" +(define_insn "*nonsecure_call_value_reg_thumb2_fpcxt" [(set (match_operand 0 "" "") (call (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "l*r"))] @@ -572,13 +579,21 @@ (match_operand 2 "" ""))) (use (match_operand 3 "" "")) (clobber (reg:SI LR_REGNUM))] - "TARGET_THUMB2 && use_cmse" - { - if (TARGET_HAVE_FPCXT_CMSE) - return "blxns\\t%1"; - else - return "bl\\t__gnu_cmse_nonsecure_call"; - } + "TARGET_THUMB2 && use_cmse && TARGET_HAVE_FPCXT_CMSE" + "blxns\\t%1" + [(set_attr "length" "4") + (set_attr "type" "call")] +) + +(define_insn "*nonsecure_call_value_reg_thumb2" + [(set (match_operand 0 "" "") + (call + (unspec:SI [(mem:SI (reg:SI R4_REGNUM))] UNSPEC_NONSECURE_MEM) + (match_operand 1 "" ""))) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB2 && use_cmse && !TARGET_HAVE_FPCXT_CMSE" + "bl\\t__gnu_cmse_nonsecure_call" [(set_attr "length" "4") (set_attr "type" "call")] ) diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c index 48ea855..d9213d7 100644 --- a/gcc/config/cris/cris.c +++ b/gcc/config/cris/cris.c @@ -150,7 +150,7 @@ static rtx cris_function_incoming_arg (cumulative_args_t, static void cris_function_arg_advance (cumulative_args_t, const function_arg_info &); static rtx_insn *cris_md_asm_adjust (vec<rtx> &, vec<rtx> &, - vec<const char *> &, + vec<machine_mode> &, vec<const char *> &, vec<rtx> &, HARD_REG_SET &); static void cris_option_override (void); @@ -864,7 +864,7 @@ cris_reg_saved_in_regsave_area (unsigned int regno) return (((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))) - && (regno != FRAME_POINTER_REGNUM || !frame_pointer_needed) + && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed) && regno != CRIS_SRP_REGNUM) || (crtl->calls_eh_return && (regno == EH_RETURN_DATA_REGNO (0) @@ -880,9 +880,6 @@ cris_print_operand (FILE *file, rtx x, int code) { rtx operand = x; - /* Size-strings corresponding to MULT expressions. */ - static const char *const mults[] = { "BAD:0", ".b", ".w", "BAD:3", ".d" }; - /* New code entries should just be added to the switch below. If handling is finished, just return. If handling was just a modification of the operand, the modified operand should be put in @@ -1212,11 +1209,21 @@ cris_print_operand (FILE *file, rtx x, int code) return; case 'T': - /* Print the size letter for an operand to a MULT, which must be a - const_int with a suitable value. */ - if (!CONST_INT_P (operand) || INTVAL (operand) > 4) - LOSE_AND_RETURN ("invalid operand for 'T' modifier", x); - fprintf (file, "%s", mults[INTVAL (operand)]); + { + /* Print the size letter for an operand to a ASHIFT, which must be a + const_int with a suitable value. */ + int shiftval; + + if (!CONST_INT_P (operand)) + LOSE_AND_RETURN ("invalid operand for 'T' modifier", x); + + shiftval = INTVAL (operand); + + if (!(shiftval == 1 || shiftval == 2)) + LOSE_AND_RETURN ("invalid operand for 'T' modifier", x); + + fprintf (file, "%s", shiftval == 1 ? ".w" : ".d"); + } return; case 0: @@ -1438,7 +1445,7 @@ cris_initial_elimination_offset (int fromreg, int toreg) int ap_fp_offset = 4 + (return_address_on_stack ? 4 : 0); if (fromreg == ARG_POINTER_REGNUM - && toreg == FRAME_POINTER_REGNUM) + && toreg == HARD_FRAME_POINTER_REGNUM) return ap_fp_offset; /* Between the frame pointer and the stack are only "normal" stack @@ -1452,6 +1459,10 @@ cris_initial_elimination_offset (int fromreg, int toreg) && toreg == STACK_POINTER_REGNUM) return ap_fp_offset + fp_sp_offset - 4; + if (fromreg == FRAME_POINTER_REGNUM + && toreg == HARD_FRAME_POINTER_REGNUM) + return 0; + gcc_unreachable (); } @@ -2742,10 +2753,10 @@ cris_expand_prologue (void) mem = gen_rtx_MEM (SImode, stack_pointer_rtx); set_mem_alias_set (mem, get_frame_alias_set ()); - insn = emit_move_insn (mem, frame_pointer_rtx); + insn = emit_move_insn (mem, hard_frame_pointer_rtx); RTX_FRAME_RELATED_P (insn) = 1; - insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); RTX_FRAME_RELATED_P (insn) = 1; framesize += 4; @@ -2885,8 +2896,13 @@ cris_expand_prologue (void) framesize += size + cfoa_size; } + /* FIXME: -mmax-stackframe=SIZE is obsoleted; use -Wstack-usage=SIZE + instead. Make it an alias? */ if (cris_max_stackframe && framesize > cris_max_stackframe) warning (0, "stackframe too big: %d bytes", framesize); + + if (flag_stack_usage_info) + current_function_static_stack_size = framesize; } /* The expander for the epilogue pattern. */ @@ -3003,11 +3019,11 @@ cris_expand_epilogue (void) emit_insn (gen_cris_frame_deallocated_barrier ()); - emit_move_insn (stack_pointer_rtx, frame_pointer_rtx); + emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); mem = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode, stack_pointer_rtx)); set_mem_alias_set (mem, get_frame_alias_set ()); - insn = emit_move_insn (frame_pointer_rtx, mem); + insn = emit_move_insn (hard_frame_pointer_rtx, mem); /* Whenever we emit insns with post-incremented addresses ourselves, we must add a post-inc note manually. */ @@ -3489,8 +3505,9 @@ cris_function_arg_advance (cumulative_args_t ca_v, static rtx_insn * cris_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, - vec<const char *> &constraints, - vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) + vec<machine_mode> & /*input_modes*/, + vec<const char *> &constraints, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs) { /* For the time being, all asms clobber condition codes. Revisit when there's a reasonable use for inputs/outputs diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h index d691da9..1ab830e 100644 --- a/gcc/config/cris/cris.h +++ b/gcc/config/cris/cris.h @@ -352,13 +352,6 @@ extern int cris_cpu_version; with other GNU/Linux ports (i.e. elfos.h users). */ #undef PCC_BITFIELD_TYPE_MATTERS -/* This is only used for non-scalars. Strange stuff happens to structs - (FIXME: What?) if we use anything larger than largest actually used - datum size, so lets make it 32. The type "long long" will still work - as usual. We can still have DImode insns, but they will only be used - for scalar data (i.e. long long). */ -#define MAX_FIXED_MODE_SIZE 32 - /* Node: Type Layout */ @@ -380,8 +373,8 @@ extern int cris_cpu_version; /* Node: Register Basics */ /* We count all 16 non-special registers, SRP, a faked argument - pointer register, MOF and CCR/DCCR. */ -#define FIRST_PSEUDO_REGISTER (16 + 1 + 1 + 1 + 1) + pointer register, MOF, CCR/DCCR, and the faked frame-pointer. */ +#define FIRST_PSEUDO_REGISTER (16 + 1 + 1 + 1 + 1 + 1) /* For CRIS, these are r15 (pc) and r14 (sp). Register r8 is used as a frame-pointer, but is not fixed. SRP is not included in general @@ -389,12 +382,12 @@ extern int cris_cpu_version; registers are fixed at the moment. The faked argument pointer register is fixed too. */ #define FIXED_REGISTERS \ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1} + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1} /* Register r9 is used for structure-address, r10-r13 for parameters, r10- for return values. */ #define CALL_USED_REGISTERS \ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1} + {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1} /* Node: Allocation Order */ @@ -417,7 +410,8 @@ extern int cris_cpu_version; Use struct-return address first, since very few functions use structure return values so it is likely to be available. */ #define REG_ALLOC_ORDER \ - {9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 17, 16, 18, 19} + {9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 17, 16, 18, 19, \ + 20} /* Node: Leaf Functions */ @@ -458,6 +452,9 @@ enum reg_class #define CRIS_SPECIAL_REGS_CONTENTS \ ((1 << CRIS_SRP_REGNUM) | (1 << CRIS_MOF_REGNUM) | (1 << CRIS_CC0_REGNUM)) +#define CRIS_FAKED_REGS_CONTENTS \ + ((1 << CRIS_AP_REGNUM) | (1 << CRIS_FP_REGNUM)) + /* Count in the faked argument register in GENERAL_REGS. Keep out SRP. */ #define REG_CLASS_CONTENTS \ { \ @@ -471,13 +468,13 @@ enum reg_class {CRIS_SPECIAL_REGS_CONTENTS}, \ {CRIS_SPECIAL_REGS_CONTENTS \ | (1 << CRIS_ACR_REGNUM)}, \ - {(0xffff | (1 << CRIS_AP_REGNUM)) \ + {(0xffff | CRIS_FAKED_REGS_CONTENTS) \ & ~(1 << CRIS_ACR_REGNUM)}, \ - {(0xffff | (1 << CRIS_AP_REGNUM) \ + {(0xffff | CRIS_FAKED_REGS_CONTENTS \ | CRIS_SPECIAL_REGS_CONTENTS) \ & ~(1 << CRIS_ACR_REGNUM)}, \ - {0xffff | (1 << CRIS_AP_REGNUM)}, \ - {0xffff | (1 << CRIS_AP_REGNUM) \ + {0xffff | CRIS_FAKED_REGS_CONTENTS}, \ + {0xffff | CRIS_FAKED_REGS_CONTENTS \ | CRIS_SPECIAL_REGS_CONTENTS} \ } @@ -500,8 +497,10 @@ enum reg_class allocation. */ #define REGNO_OK_FOR_BASE_P(REGNO) \ ((REGNO) <= CRIS_LAST_GENERAL_REGISTER \ + || (REGNO) == FRAME_POINTER_REGNUM \ || (REGNO) == ARG_POINTER_REGNUM \ || (unsigned) reg_renumber[REGNO] <= CRIS_LAST_GENERAL_REGISTER \ + || (unsigned) reg_renumber[REGNO] == FRAME_POINTER_REGNUM \ || (unsigned) reg_renumber[REGNO] == ARG_POINTER_REGNUM) /* See REGNO_OK_FOR_BASE_P. */ @@ -587,6 +586,9 @@ enum reg_class /* Register used for frame pointer. This is also the last of the saved registers, when a frame pointer is not used. */ +#define HARD_FRAME_POINTER_REGNUM CRIS_REAL_FP_REGNUM + +/* Faked register, is always eliminated to at least CRIS_REAL_FP_REGNUM. */ #define FRAME_POINTER_REGNUM CRIS_FP_REGNUM /* Faked register, is always eliminated. We need it to eliminate @@ -595,13 +597,17 @@ enum reg_class #define STATIC_CHAIN_REGNUM CRIS_STATIC_CHAIN_REGNUM +/* No unwind context is needed for faked registers nor DCCR. Currently not MOF + too, but let's keep that open. */ +#define DWARF_FRAME_REGISTERS (CRIS_MOF_REGNUM + 1) /* Node: Elimination */ #define ELIMINABLE_REGS \ {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ - {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ - {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}} + {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ (OFFSET) = cris_initial_elimination_offset (FROM, TO) @@ -822,7 +828,8 @@ struct cum_args {int regs;}; #define REGISTER_NAMES \ {"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", \ - "r9", "r10", "r11", "r12", "r13", "sp", "pc", "srp", "mof", "faked_ap", "dccr"} + "r9", "r10", "r11", "r12", "r13", "sp", "pc", "srp", \ + "mof", "faked_ap", "dccr", "faked_fp"} #define ADDITIONAL_REGISTER_NAMES \ {{"r14", 14}, {"r15", 15}} diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md index 0fd29f9..7de0ec6 100644 --- a/gcc/config/cris/cris.md +++ b/gcc/config/cris/cris.md @@ -58,13 +58,14 @@ ;; Register numbers. (define_constants [(CRIS_STATIC_CHAIN_REGNUM 7) - (CRIS_FP_REGNUM 8) + (CRIS_REAL_FP_REGNUM 8) (CRIS_SP_REGNUM 14) (CRIS_ACR_REGNUM 15) (CRIS_SRP_REGNUM 16) (CRIS_MOF_REGNUM 17) (CRIS_AP_REGNUM 18) - (CRIS_CC0_REGNUM 19)] + (CRIS_CC0_REGNUM 19) + (CRIS_FP_REGNUM 20)] ) ;; We need an attribute to define whether an instruction can be put in @@ -1278,18 +1279,43 @@ (define_insn "*addi" [(set (match_operand:SI 0 "register_operand" "=r") (plus:SI - (mult:SI (match_operand:SI 2 "register_operand" "r") - (match_operand:SI 3 "const_int_operand" "n")) + (ashift:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")) (match_operand:SI 1 "register_operand" "0")))] "operands[0] != frame_pointer_rtx && operands[1] != frame_pointer_rtx && CONST_INT_P (operands[3]) - && (INTVAL (operands[3]) == 1 - || INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)" + && (INTVAL (operands[3]) == 1 || INTVAL (operands[3]) == 2)" "addi %2%T3,%0" [(set_attr "slottable" "yes") (set_attr "cc" "none")]) +;; The mult-vs-ashift canonicalization-cleanup plagues us: nothing in +;; reload transforms a "scaled multiplication" into an ashift in a +;; reloaded address; it's passed as-is and expected to be recognized, +;; or else we get a tell-tale "unrecognizable insn". +;; On top of that, we *should* match the bare insn, as a *matching +;; pattern* (as opposed to e.g. a reload_load_address expander +;; changing the mul into an ashift), so can_reload_into will re-use +;; registers in the reloaded expression instead of allocating a new +;; register. +(define_insn_and_split "*addi_reload" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")) + (match_operand:SI 1 "register_operand" "0")))] + "operands[0] != frame_pointer_rtx + && operands[1] != frame_pointer_rtx + && CONST_INT_P (operands[3]) + && (INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4) + && (reload_in_progress || reload_completed)" + "#" + "" + [(set (match_dup 0) + (plus:SI (ashift:SI (match_dup 2) (match_dup 3)) (match_dup 1)))] + "operands[3] = operands[3] == const2_rtx ? const1_rtx : const2_rtx;") + ;; This pattern is usually generated after reload, so a '%' is ;; ineffective; use explicit combinations. (define_insn "*addi_b_<mode>" diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h index 2120eb6..f5ef824 100644 --- a/gcc/config/darwin-protos.h +++ b/gcc/config/darwin-protos.h @@ -70,6 +70,7 @@ extern void darwin_non_lazy_pcrel (FILE *, rtx); extern void darwin_emit_unwind_label (FILE *, tree, int, int); extern void darwin_emit_except_table_label (FILE *); extern rtx darwin_make_eh_symbol_indirect (rtx, bool); +extern bool darwin_should_restore_cfa_state (void); extern void darwin_pragma_ignore (struct cpp_reader *); extern void darwin_pragma_options (struct cpp_reader *); diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c index 119f319..e2e60bb 100644 --- a/gcc/config/darwin.c +++ b/gcc/config/darwin.c @@ -2236,6 +2236,16 @@ darwin_make_eh_symbol_indirect (rtx orig, bool ARG_UNUSED (pubvis)) /*stub_p=*/false)); } +/* The unwinders in earlier Darwin versions are based on an old version + of libgcc_s and need current frame address stateto be reset after a + DW_CFA_restore_state recovers the register values. */ + +bool +darwin_should_restore_cfa_state (void) +{ + return generating_for_darwin_version <= 10; +} + /* Return, and mark as used, the name of the stub for the mcount function. Currently, this is only called by X86 code in the expansion of the FUNCTION_PROFILER macro, when stubs are enabled. */ diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index 5a9fb43f..d2b2c14 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -614,6 +614,11 @@ extern GTY(()) int darwin_ms_struct; /* Make an EH (personality or LDSA) symbol indirect as needed. */ #define TARGET_ASM_MAKE_EH_SYMBOL_INDIRECT darwin_make_eh_symbol_indirect +/* Some of Darwin's unwinders need current frame address state to be reset + after a DW_CFA_restore_state recovers the register values. */ +#undef TARGET_ASM_SHOULD_RESTORE_CFA_STATE +#define TARGET_ASM_SHOULD_RESTORE_CFA_STATE darwin_should_restore_cfa_state + /* Our profiling scheme doesn't LP labels and counter words. */ #define NO_PROFILE_COUNTERS 1 diff --git a/gcc/config/host-darwin.c b/gcc/config/host-darwin.c index 1816c61..b101fca 100644 --- a/gcc/config/host-darwin.c +++ b/gcc/config/host-darwin.c @@ -61,7 +61,8 @@ darwin_gt_pch_use_address (void *addr, size_t sz, int fd, size_t off) sz = (sz + pagesize - 1) / pagesize * pagesize; if (munmap (pch_address_space + sz, sizeof (pch_address_space) - sz) != 0) - fatal_error (input_location, "couldn%'t unmap pch_address_space: %m"); + fatal_error (input_location, + "could not unmap %<pch_address_space%> %m"); if (ret) { diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 0ccefa8..a8db33e 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -110,7 +110,7 @@ ;; v any EVEX encodable SSE register for AVX512VL target, ;; otherwise any SSE register ;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL -;; target. +;; target, otherwise any SSE register. (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS" "First SSE register (@code{%xmm0}).") @@ -148,8 +148,8 @@ "@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.") (define_register_constraint "Yw" - "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS" - "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target.") + "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS" + "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.") ;; We use the B prefix to denote any number of internal operands: ;; f FLAGS_REG diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index cdeabbf..e93935f 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -2159,11 +2159,11 @@ ix86_option_override_internal (bool main_args_p, && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_MOVBE)) opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_MOVBE; if (((processor_alias_table[i].flags & PTA_AES) != 0) - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) - ix86_isa_flags |= OPTION_MASK_ISA_AES; + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AES; if (((processor_alias_table[i].flags & PTA_SHA) != 0) - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA)) - ix86_isa_flags |= OPTION_MASK_ISA_SHA; + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SHA; if (((processor_alias_table[i].flags & PTA_PCLMUL) != 0) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; @@ -2354,6 +2354,13 @@ ix86_option_override_internal (bool main_args_p, if (((processor_alias_table[i].flags & PTA_PKU) != 0) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU; + if (((processor_alias_table[i].flags & PTA_UINTR) != 0) + && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_UINTR)) + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_UINTR; + if (((processor_alias_table[i].flags & PTA_HRESET) != 0) + && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_HRESET)) + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_HRESET; + /* Don't enable x87 instructions if only general registers are allowed by target("general-regs-only") function attribute or diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 48f9aa0..2603333 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -6490,11 +6490,6 @@ ix86_compute_frame_layout (void) offset += frame->nregs * UNITS_PER_WORD; frame->reg_save_offset = offset; - /* On SEH target, registers are pushed just before the frame pointer - location. */ - if (TARGET_SEH) - frame->hard_frame_pointer_offset = offset; - /* Calculate the size of the va-arg area (not including padding, if any). */ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; @@ -6660,14 +6655,22 @@ ix86_compute_frame_layout (void) the unwind data structure. */ if (TARGET_SEH) { - HOST_WIDE_INT diff; + /* Force the frame pointer to point at or below the lowest register save + area, see the SEH code in config/i386/winnt.c for the rationale. */ + frame->hard_frame_pointer_offset = frame->sse_reg_save_offset; - /* If we can leave the frame pointer where it is, do so. Also, returns - the establisher frame for __builtin_frame_address (0). */ - diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; - if (diff <= SEH_MAX_FRAME_SIZE - && (diff > 240 || (diff & 15) != 0) - && !crtl->accesses_prior_frames) + /* If we can leave the frame pointer where it is, do so. Also, return + the establisher frame for __builtin_frame_address (0) or else if the + frame overflows the SEH maximum frame size. */ + const HOST_WIDE_INT diff + = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; + if (diff <= 255) + { + /* The resulting diff will be a multiple of 16 lower than 255, + i.e. at most 240 as required by the unwind data structure. */ + frame->hard_frame_pointer_offset += (diff & 15); + } + else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames) { /* Ideally we'd determine what portion of the local stack frame (within the constraint of the lowest 240) is most heavily used. @@ -6676,6 +6679,8 @@ ix86_compute_frame_layout (void) frame that is addressable with 8-bit offsets. */ frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; } + else + frame->hard_frame_pointer_offset = frame->hfp_save_offset; } } @@ -8336,17 +8341,6 @@ ix86_expand_prologue (void) insn = emit_insn (gen_push (hard_frame_pointer_rtx)); RTX_FRAME_RELATED_P (insn) = 1; - /* Push registers now, before setting the frame pointer - on SEH target. */ - if (!int_registers_saved - && TARGET_SEH - && !frame.save_regs_using_mov) - { - ix86_emit_save_regs (); - int_registers_saved = true; - gcc_assert (m->fs.sp_offset == frame.reg_save_offset); - } - if (m->fs.sp_offset == frame.hard_frame_pointer_offset) { insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); @@ -14754,11 +14748,6 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2, } } - /* get_attr_type may modify recog data. We want to make sure - that recog data is valid for instruction INSN, on which - distance_non_agu_define is called. INSN is unchanged here. */ - extract_insn_cached (insn); - if (!found) return -1; @@ -14928,17 +14917,15 @@ ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, return true; } - rtx_insn *rinsn = recog_data.insn; + /* Remember recog_data content. */ + struct recog_data_d recog_data_save = recog_data; dist_define = distance_non_agu_define (regno1, regno2, insn); dist_use = distance_agu_use (regno0, insn); - /* distance_non_agu_define can call extract_insn_cached. If this function - is called from define_split conditions, that can break insn splitting, - because split_insns works by clearing recog_data.insn and then modifying - recog_data.operand array and match the various split conditions. */ - if (recog_data.insn != rinsn) - recog_data.insn = NULL; + /* distance_non_agu_define can call get_attr_type which can call + recog_memoized, restore recog_data back to previous content. */ + recog_data = recog_data_save; if (dist_define < 0 || dist_define >= LEA_MAX_STALL) { @@ -14968,38 +14955,6 @@ ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, return dist_define >= dist_use; } -/* Return true if it is legal to clobber flags by INSN and - false otherwise. */ - -static bool -ix86_ok_to_clobber_flags (rtx_insn *insn) -{ - basic_block bb = BLOCK_FOR_INSN (insn); - df_ref use; - bitmap live; - - while (insn) - { - if (NONDEBUG_INSN_P (insn)) - { - FOR_EACH_INSN_USE (use, insn) - if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG) - return false; - - if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn)) - return true; - } - - if (insn == BB_END (bb)) - break; - - insn = NEXT_INSN (insn); - } - - live = df_get_live_out(bb); - return !REGNO_REG_SET_P (live, FLAGS_REG); -} - /* Return true if we need to split op0 = op1 + op2 into a sequence of move and add to avoid AGU stalls. */ @@ -15012,10 +14967,6 @@ ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) return false; - /* Check it is correct to split here. */ - if (!ix86_ok_to_clobber_flags(insn)) - return false; - regno0 = true_regnum (operands[0]); regno1 = true_regnum (operands[1]); regno2 = true_regnum (operands[2]); @@ -15051,7 +15002,7 @@ ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) } /* Return true if we need to split lea into a sequence of - instructions to avoid AGU stalls. */ + instructions to avoid AGU stalls during peephole2. */ bool ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) @@ -15071,10 +15022,6 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) && REG_P (XEXP (operands[1], 0)))) return false; - /* Check if it is OK to split here. */ - if (!ix86_ok_to_clobber_flags (insn)) - return false; - ok = ix86_decompose_address (operands[1], &parts); gcc_assert (ok); @@ -21426,9 +21373,10 @@ ix86_c_mode_for_suffix (char suffix) with the old cc0-based compiler. */ static rtx_insn * -ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/, - vec<const char *> &constraints, - vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) +ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, + vec<const char *> &constraints, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs) { bool saw_asm_flag = false; @@ -22657,15 +22605,15 @@ ix86_simd_clone_usable (struct cgraph_node *node) return -1; if (!TARGET_AVX) return 0; - return TARGET_AVX2 ? 2 : 1; + return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1; case 'c': if (!TARGET_AVX) return -1; - return TARGET_AVX2 ? 1 : 0; + return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0; case 'd': if (!TARGET_AVX2) return -1; - return 0; + return TARGET_AVX512F ? 1 : 0; case 'e': if (!TARGET_AVX512F) return -1; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 272b195..69fddca 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1163,6 +1163,22 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); /* k0, k1, k2, k3, k4, k5, k6, k7*/ \ 1, 1, 1, 1, 1, 1, 1, 1 } +/* Order in which to allocate registers. Each register must be + listed once, even those in FIXED_REGISTERS. List frame pointer + late and fixed registers last. Note that, in general, we prefer + registers listed in CALL_USED_REGISTERS, keeping the others + available for storage of persistent values. + + The ADJUST_REG_ALLOC_ORDER actually overwrite the order, + so this is just empty initializer for array. */ + +#define REG_ALLOC_ORDER \ +{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, \ + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \ + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, \ + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75 } + /* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order to be rearranged based on a particular function. When using sse math, we want to allocate SSE before x87 registers and vice versa. */ @@ -2391,157 +2407,160 @@ extern const char *const processor_names[]; #include "wide-int-bitmask.h" -const wide_int_bitmask PTA_3DNOW (HOST_WIDE_INT_1U << 0); -const wide_int_bitmask PTA_3DNOW_A (HOST_WIDE_INT_1U << 1); -const wide_int_bitmask PTA_64BIT (HOST_WIDE_INT_1U << 2); -const wide_int_bitmask PTA_ABM (HOST_WIDE_INT_1U << 3); -const wide_int_bitmask PTA_AES (HOST_WIDE_INT_1U << 4); -const wide_int_bitmask PTA_AVX (HOST_WIDE_INT_1U << 5); -const wide_int_bitmask PTA_BMI (HOST_WIDE_INT_1U << 6); -const wide_int_bitmask PTA_CX16 (HOST_WIDE_INT_1U << 7); -const wide_int_bitmask PTA_F16C (HOST_WIDE_INT_1U << 8); -const wide_int_bitmask PTA_FMA (HOST_WIDE_INT_1U << 9); -const wide_int_bitmask PTA_FMA4 (HOST_WIDE_INT_1U << 10); -const wide_int_bitmask PTA_FSGSBASE (HOST_WIDE_INT_1U << 11); -const wide_int_bitmask PTA_LWP (HOST_WIDE_INT_1U << 12); -const wide_int_bitmask PTA_LZCNT (HOST_WIDE_INT_1U << 13); -const wide_int_bitmask PTA_MMX (HOST_WIDE_INT_1U << 14); -const wide_int_bitmask PTA_MOVBE (HOST_WIDE_INT_1U << 15); -const wide_int_bitmask PTA_NO_SAHF (HOST_WIDE_INT_1U << 16); -const wide_int_bitmask PTA_PCLMUL (HOST_WIDE_INT_1U << 17); -const wide_int_bitmask PTA_POPCNT (HOST_WIDE_INT_1U << 18); -const wide_int_bitmask PTA_PREFETCH_SSE (HOST_WIDE_INT_1U << 19); -const wide_int_bitmask PTA_RDRND (HOST_WIDE_INT_1U << 20); -const wide_int_bitmask PTA_SSE (HOST_WIDE_INT_1U << 21); -const wide_int_bitmask PTA_SSE2 (HOST_WIDE_INT_1U << 22); -const wide_int_bitmask PTA_SSE3 (HOST_WIDE_INT_1U << 23); -const wide_int_bitmask PTA_SSE4_1 (HOST_WIDE_INT_1U << 24); -const wide_int_bitmask PTA_SSE4_2 (HOST_WIDE_INT_1U << 25); -const wide_int_bitmask PTA_SSE4A (HOST_WIDE_INT_1U << 26); -const wide_int_bitmask PTA_SSSE3 (HOST_WIDE_INT_1U << 27); -const wide_int_bitmask PTA_TBM (HOST_WIDE_INT_1U << 28); -const wide_int_bitmask PTA_XOP (HOST_WIDE_INT_1U << 29); -const wide_int_bitmask PTA_AVX2 (HOST_WIDE_INT_1U << 30); -const wide_int_bitmask PTA_BMI2 (HOST_WIDE_INT_1U << 31); -const wide_int_bitmask PTA_RTM (HOST_WIDE_INT_1U << 32); -const wide_int_bitmask PTA_HLE (HOST_WIDE_INT_1U << 33); -const wide_int_bitmask PTA_PRFCHW (HOST_WIDE_INT_1U << 34); -const wide_int_bitmask PTA_RDSEED (HOST_WIDE_INT_1U << 35); -const wide_int_bitmask PTA_ADX (HOST_WIDE_INT_1U << 36); -const wide_int_bitmask PTA_FXSR (HOST_WIDE_INT_1U << 37); -const wide_int_bitmask PTA_XSAVE (HOST_WIDE_INT_1U << 38); -const wide_int_bitmask PTA_XSAVEOPT (HOST_WIDE_INT_1U << 39); -const wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40); -const wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41); -const wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42); -const wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43); -const wide_int_bitmask PTA_NO_TUNE (HOST_WIDE_INT_1U << 44); -const wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45); -const wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46); -const wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47); -const wide_int_bitmask PTA_XSAVEC (HOST_WIDE_INT_1U << 48); -const wide_int_bitmask PTA_XSAVES (HOST_WIDE_INT_1U << 49); -const wide_int_bitmask PTA_AVX512DQ (HOST_WIDE_INT_1U << 50); -const wide_int_bitmask PTA_AVX512BW (HOST_WIDE_INT_1U << 51); -const wide_int_bitmask PTA_AVX512VL (HOST_WIDE_INT_1U << 52); -const wide_int_bitmask PTA_AVX512IFMA (HOST_WIDE_INT_1U << 53); -const wide_int_bitmask PTA_AVX512VBMI (HOST_WIDE_INT_1U << 54); -const wide_int_bitmask PTA_CLWB (HOST_WIDE_INT_1U << 55); -const wide_int_bitmask PTA_MWAITX (HOST_WIDE_INT_1U << 56); -const wide_int_bitmask PTA_CLZERO (HOST_WIDE_INT_1U << 57); -const wide_int_bitmask PTA_NO_80387 (HOST_WIDE_INT_1U << 58); -const wide_int_bitmask PTA_PKU (HOST_WIDE_INT_1U << 59); -const wide_int_bitmask PTA_AVX5124VNNIW (HOST_WIDE_INT_1U << 60); -const wide_int_bitmask PTA_AVX5124FMAPS (HOST_WIDE_INT_1U << 61); -const wide_int_bitmask PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1U << 62); -const wide_int_bitmask PTA_SGX (HOST_WIDE_INT_1U << 63); -const wide_int_bitmask PTA_AVX512VNNI (0, HOST_WIDE_INT_1U); -const wide_int_bitmask PTA_GFNI (0, HOST_WIDE_INT_1U << 1); -const wide_int_bitmask PTA_VAES (0, HOST_WIDE_INT_1U << 2); -const wide_int_bitmask PTA_AVX512VBMI2 (0, HOST_WIDE_INT_1U << 3); -const wide_int_bitmask PTA_VPCLMULQDQ (0, HOST_WIDE_INT_1U << 4); -const wide_int_bitmask PTA_AVX512BITALG (0, HOST_WIDE_INT_1U << 5); -const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6); -const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7); -const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8); -const wide_int_bitmask PTA_AVX512VP2INTERSECT (0, HOST_WIDE_INT_1U << 9); -const wide_int_bitmask PTA_PTWRITE (0, HOST_WIDE_INT_1U << 10); -const wide_int_bitmask PTA_AVX512BF16 (0, HOST_WIDE_INT_1U << 11); -const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 12); -const wide_int_bitmask PTA_MOVDIRI (0, HOST_WIDE_INT_1U << 13); -const wide_int_bitmask PTA_MOVDIR64B (0, HOST_WIDE_INT_1U << 14); -const wide_int_bitmask PTA_ENQCMD (0, HOST_WIDE_INT_1U << 15); -const wide_int_bitmask PTA_CLDEMOTE (0, HOST_WIDE_INT_1U << 16); -const wide_int_bitmask PTA_SERIALIZE (0, HOST_WIDE_INT_1U << 17); -const wide_int_bitmask PTA_TSXLDTRK (0, HOST_WIDE_INT_1U << 18); -const wide_int_bitmask PTA_AMX_TILE (0, HOST_WIDE_INT_1U << 19); -const wide_int_bitmask PTA_AMX_INT8 (0, HOST_WIDE_INT_1U << 20); -const wide_int_bitmask PTA_AMX_BF16 (0, HOST_WIDE_INT_1U << 21); -const wide_int_bitmask PTA_UINTR (0, HOST_WIDE_INT_1U << 22); -const wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23); -const wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24); -const wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25); -const wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26); - -const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE +constexpr wide_int_bitmask PTA_3DNOW (HOST_WIDE_INT_1U << 0); +constexpr wide_int_bitmask PTA_3DNOW_A (HOST_WIDE_INT_1U << 1); +constexpr wide_int_bitmask PTA_64BIT (HOST_WIDE_INT_1U << 2); +constexpr wide_int_bitmask PTA_ABM (HOST_WIDE_INT_1U << 3); +constexpr wide_int_bitmask PTA_AES (HOST_WIDE_INT_1U << 4); +constexpr wide_int_bitmask PTA_AVX (HOST_WIDE_INT_1U << 5); +constexpr wide_int_bitmask PTA_BMI (HOST_WIDE_INT_1U << 6); +constexpr wide_int_bitmask PTA_CX16 (HOST_WIDE_INT_1U << 7); +constexpr wide_int_bitmask PTA_F16C (HOST_WIDE_INT_1U << 8); +constexpr wide_int_bitmask PTA_FMA (HOST_WIDE_INT_1U << 9); +constexpr wide_int_bitmask PTA_FMA4 (HOST_WIDE_INT_1U << 10); +constexpr wide_int_bitmask PTA_FSGSBASE (HOST_WIDE_INT_1U << 11); +constexpr wide_int_bitmask PTA_LWP (HOST_WIDE_INT_1U << 12); +constexpr wide_int_bitmask PTA_LZCNT (HOST_WIDE_INT_1U << 13); +constexpr wide_int_bitmask PTA_MMX (HOST_WIDE_INT_1U << 14); +constexpr wide_int_bitmask PTA_MOVBE (HOST_WIDE_INT_1U << 15); +constexpr wide_int_bitmask PTA_NO_SAHF (HOST_WIDE_INT_1U << 16); +constexpr wide_int_bitmask PTA_PCLMUL (HOST_WIDE_INT_1U << 17); +constexpr wide_int_bitmask PTA_POPCNT (HOST_WIDE_INT_1U << 18); +constexpr wide_int_bitmask PTA_PREFETCH_SSE (HOST_WIDE_INT_1U << 19); +constexpr wide_int_bitmask PTA_RDRND (HOST_WIDE_INT_1U << 20); +constexpr wide_int_bitmask PTA_SSE (HOST_WIDE_INT_1U << 21); +constexpr wide_int_bitmask PTA_SSE2 (HOST_WIDE_INT_1U << 22); +constexpr wide_int_bitmask PTA_SSE3 (HOST_WIDE_INT_1U << 23); +constexpr wide_int_bitmask PTA_SSE4_1 (HOST_WIDE_INT_1U << 24); +constexpr wide_int_bitmask PTA_SSE4_2 (HOST_WIDE_INT_1U << 25); +constexpr wide_int_bitmask PTA_SSE4A (HOST_WIDE_INT_1U << 26); +constexpr wide_int_bitmask PTA_SSSE3 (HOST_WIDE_INT_1U << 27); +constexpr wide_int_bitmask PTA_TBM (HOST_WIDE_INT_1U << 28); +constexpr wide_int_bitmask PTA_XOP (HOST_WIDE_INT_1U << 29); +constexpr wide_int_bitmask PTA_AVX2 (HOST_WIDE_INT_1U << 30); +constexpr wide_int_bitmask PTA_BMI2 (HOST_WIDE_INT_1U << 31); +constexpr wide_int_bitmask PTA_RTM (HOST_WIDE_INT_1U << 32); +constexpr wide_int_bitmask PTA_HLE (HOST_WIDE_INT_1U << 33); +constexpr wide_int_bitmask PTA_PRFCHW (HOST_WIDE_INT_1U << 34); +constexpr wide_int_bitmask PTA_RDSEED (HOST_WIDE_INT_1U << 35); +constexpr wide_int_bitmask PTA_ADX (HOST_WIDE_INT_1U << 36); +constexpr wide_int_bitmask PTA_FXSR (HOST_WIDE_INT_1U << 37); +constexpr wide_int_bitmask PTA_XSAVE (HOST_WIDE_INT_1U << 38); +constexpr wide_int_bitmask PTA_XSAVEOPT (HOST_WIDE_INT_1U << 39); +constexpr wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40); +constexpr wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41); +constexpr wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42); +constexpr wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43); +constexpr wide_int_bitmask PTA_NO_TUNE (HOST_WIDE_INT_1U << 44); +constexpr wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45); +constexpr wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46); +constexpr wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47); +constexpr wide_int_bitmask PTA_XSAVEC (HOST_WIDE_INT_1U << 48); +constexpr wide_int_bitmask PTA_XSAVES (HOST_WIDE_INT_1U << 49); +constexpr wide_int_bitmask PTA_AVX512DQ (HOST_WIDE_INT_1U << 50); +constexpr wide_int_bitmask PTA_AVX512BW (HOST_WIDE_INT_1U << 51); +constexpr wide_int_bitmask PTA_AVX512VL (HOST_WIDE_INT_1U << 52); +constexpr wide_int_bitmask PTA_AVX512IFMA (HOST_WIDE_INT_1U << 53); +constexpr wide_int_bitmask PTA_AVX512VBMI (HOST_WIDE_INT_1U << 54); +constexpr wide_int_bitmask PTA_CLWB (HOST_WIDE_INT_1U << 55); +constexpr wide_int_bitmask PTA_MWAITX (HOST_WIDE_INT_1U << 56); +constexpr wide_int_bitmask PTA_CLZERO (HOST_WIDE_INT_1U << 57); +constexpr wide_int_bitmask PTA_NO_80387 (HOST_WIDE_INT_1U << 58); +constexpr wide_int_bitmask PTA_PKU (HOST_WIDE_INT_1U << 59); +constexpr wide_int_bitmask PTA_AVX5124VNNIW (HOST_WIDE_INT_1U << 60); +constexpr wide_int_bitmask PTA_AVX5124FMAPS (HOST_WIDE_INT_1U << 61); +constexpr wide_int_bitmask PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1U << 62); +constexpr wide_int_bitmask PTA_SGX (HOST_WIDE_INT_1U << 63); +constexpr wide_int_bitmask PTA_AVX512VNNI (0, HOST_WIDE_INT_1U); +constexpr wide_int_bitmask PTA_GFNI (0, HOST_WIDE_INT_1U << 1); +constexpr wide_int_bitmask PTA_VAES (0, HOST_WIDE_INT_1U << 2); +constexpr wide_int_bitmask PTA_AVX512VBMI2 (0, HOST_WIDE_INT_1U << 3); +constexpr wide_int_bitmask PTA_VPCLMULQDQ (0, HOST_WIDE_INT_1U << 4); +constexpr wide_int_bitmask PTA_AVX512BITALG (0, HOST_WIDE_INT_1U << 5); +constexpr wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6); +constexpr wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7); +constexpr wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8); +constexpr wide_int_bitmask PTA_AVX512VP2INTERSECT (0, HOST_WIDE_INT_1U << 9); +constexpr wide_int_bitmask PTA_PTWRITE (0, HOST_WIDE_INT_1U << 10); +constexpr wide_int_bitmask PTA_AVX512BF16 (0, HOST_WIDE_INT_1U << 11); +constexpr wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 12); +constexpr wide_int_bitmask PTA_MOVDIRI (0, HOST_WIDE_INT_1U << 13); +constexpr wide_int_bitmask PTA_MOVDIR64B (0, HOST_WIDE_INT_1U << 14); +constexpr wide_int_bitmask PTA_ENQCMD (0, HOST_WIDE_INT_1U << 15); +constexpr wide_int_bitmask PTA_CLDEMOTE (0, HOST_WIDE_INT_1U << 16); +constexpr wide_int_bitmask PTA_SERIALIZE (0, HOST_WIDE_INT_1U << 17); +constexpr wide_int_bitmask PTA_TSXLDTRK (0, HOST_WIDE_INT_1U << 18); +constexpr wide_int_bitmask PTA_AMX_TILE (0, HOST_WIDE_INT_1U << 19); +constexpr wide_int_bitmask PTA_AMX_INT8 (0, HOST_WIDE_INT_1U << 20); +constexpr wide_int_bitmask PTA_AMX_BF16 (0, HOST_WIDE_INT_1U << 21); +constexpr wide_int_bitmask PTA_UINTR (0, HOST_WIDE_INT_1U << 22); +constexpr wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23); +constexpr wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24); +constexpr wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25); +constexpr wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26); + +constexpr wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR; -const wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE & (~PTA_NO_SAHF)) +constexpr wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE + & (~PTA_NO_SAHF)) | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3; -const wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2 +constexpr wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2 | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT | PTA_MOVBE | PTA_XSAVE; -const wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3 +constexpr wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3 | PTA_AVX512F | PTA_AVX512BW | PTA_AVX512CD | PTA_AVX512DQ | PTA_AVX512VL; -const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 +constexpr wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR; -const wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 +constexpr wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT; -const wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_PCLMUL; -const wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE +constexpr wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_PCLMUL; +constexpr wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT; -const wide_int_bitmask PTA_IVYBRIDGE = PTA_SANDYBRIDGE | PTA_FSGSBASE +constexpr wide_int_bitmask PTA_IVYBRIDGE = PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C; -const wide_int_bitmask PTA_HASWELL = PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI +constexpr wide_int_bitmask PTA_HASWELL = PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT | PTA_FMA | PTA_MOVBE | PTA_HLE; -const wide_int_bitmask PTA_BROADWELL = PTA_HASWELL | PTA_ADX | PTA_RDSEED +constexpr wide_int_bitmask PTA_BROADWELL = PTA_HASWELL | PTA_ADX | PTA_RDSEED | PTA_PRFCHW; -const wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_AES | PTA_CLFLUSHOPT - | PTA_XSAVEC | PTA_XSAVES | PTA_SGX; -const wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F +constexpr wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_AES + | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SGX; +constexpr wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU | PTA_CLWB; -const wide_int_bitmask PTA_CASCADELAKE = PTA_SKYLAKE_AVX512 | PTA_AVX512VNNI; -const wide_int_bitmask PTA_COOPERLAKE = PTA_CASCADELAKE | PTA_AVX512BF16; -const wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F +constexpr wide_int_bitmask PTA_CASCADELAKE = PTA_SKYLAKE_AVX512 + | PTA_AVX512VNNI; +constexpr wide_int_bitmask PTA_COOPERLAKE = PTA_CASCADELAKE | PTA_AVX512BF16; +constexpr wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA; -const wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI +constexpr wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI | PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG | PTA_RDPID | PTA_AVX512VPOPCNTDQ; -const wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT | PTA_PCONFIG - | PTA_WBNOINVD | PTA_CLWB; -const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI +constexpr wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT + | PTA_PCONFIG | PTA_WBNOINVD | PTA_CLWB; +constexpr wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT | PTA_KL | PTA_WIDEKL; -const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI +constexpr wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI; -const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE - | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; -const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER - | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1; -const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE; -const wide_int_bitmask PTA_SILVERMONT = PTA_WESTMERE | PTA_MOVBE | PTA_RDRND - | PTA_PRFCHW; -const wide_int_bitmask PTA_GOLDMONT = PTA_SILVERMONT | PTA_AES | PTA_SHA | PTA_XSAVE - | PTA_RDSEED | PTA_XSAVEC | PTA_XSAVES | PTA_CLFLUSHOPT | PTA_XSAVEOPT - | PTA_FSGSBASE; -const wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID +constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE + | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL + | PTA_WIDEKL | PTA_AVXVNNI; +constexpr wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF + | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1; +constexpr wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE; +constexpr wide_int_bitmask PTA_SILVERMONT = PTA_WESTMERE | PTA_MOVBE + | PTA_RDRND | PTA_PRFCHW; +constexpr wide_int_bitmask PTA_GOLDMONT = PTA_SILVERMONT | PTA_AES | PTA_SHA + | PTA_XSAVE | PTA_RDSEED | PTA_XSAVEC | PTA_XSAVES | PTA_CLFLUSHOPT + | PTA_XSAVEOPT | PTA_FSGSBASE; +constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID | PTA_SGX | PTA_PTWRITE; -const wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB +constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG; -const wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW +constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ; #ifndef GENERATOR_FILE diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b60784a..2820f6d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -5176,7 +5176,7 @@ ;; Load effective address instructions -(define_insn_and_split "*lea<mode>" +(define_insn "*lea<mode>" [(set (match_operand:SWI48 0 "register_operand" "=r") (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))] "ix86_hardreg_mov_ok (operands[0], operands[1])" @@ -5189,38 +5189,36 @@ else return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}"; } - "reload_completed && ix86_avoid_lea_for_addr (insn, operands)" + [(set_attr "type" "lea") + (set (attr "mode") + (if_then_else + (match_operand 1 "SImode_address_operand") + (const_string "SI") + (const_string "<MODE>")))]) + +(define_peephole2 + [(set (match_operand:SWI48 0 "register_operand") + (match_operand:SWI48 1 "address_no_seg_operand"))] + "ix86_hardreg_mov_ok (operands[0], operands[1]) + && peep2_regno_dead_p (0, FLAGS_REG) + && ix86_avoid_lea_for_addr (peep2_next_insn (0), operands)" [(const_int 0)] { machine_mode mode = <MODE>mode; - rtx pat; - - /* ix86_avoid_lea_for_addr re-recognizes insn and may - change operands[] array behind our back. */ - pat = PATTERN (curr_insn); - - operands[0] = SET_DEST (pat); - operands[1] = SET_SRC (pat); /* Emit all operations in SImode for zero-extended addresses. */ if (SImode_address_operand (operands[1], VOIDmode)) mode = SImode; - ix86_split_lea_for_addr (curr_insn, operands, mode); + ix86_split_lea_for_addr (peep2_next_insn (0), operands, mode); /* Zero-extend return register to DImode for zero-extended addresses. */ if (mode != <MODE>mode) - emit_insn (gen_zero_extendsidi2 - (operands[0], gen_lowpart (mode, operands[0]))); + emit_insn (gen_zero_extendsidi2 (operands[0], + gen_lowpart (mode, operands[0]))); DONE; -} - [(set_attr "type" "lea") - (set (attr "mode") - (if_then_else - (match_operand 1 "SImode_address_operand") - (const_string "SI") - (const_string "<MODE>")))]) +}) ;; Add instructions @@ -19845,7 +19843,16 @@ (match_operator 3 "commutative_operator" [(match_dup 0) (match_operand 2 "memory_operand")]))] - "REGNO (operands[0]) != REGNO (operands[1])" + "REGNO (operands[0]) != REGNO (operands[1]) + /* Punt if operands[1] is %[xy]mm16+ and AVX512BW is not enabled, + as EVEX encoded vpadd[bw], vpmullw, vpmin[su][bw] and vpmax[su][bw] + instructions require AVX512BW and AVX512VL, but with the original + instructions it might require just AVX512VL. + AVX512VL is implied from TARGET_HARD_REGNO_MODE_OK. */ + && (!EXT_REX_SSE_REGNO_P (REGNO (operands[1])) + || TARGET_AVX512BW + || GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (operands[0]))) > 2 + || logic_operator (operands[3], VOIDmode))" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 9e5a4d1..c6a2882 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2021,9 +2021,9 @@ }) (define_insn "mmx_pshufw_1" - [(set (match_operand:V4HI 0 "register_operand" "=y,xYw") + [(set (match_operand:V4HI 0 "register_operand" "=y,Yw") (vec_select:V4HI - (match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw") + (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") @@ -2076,6 +2076,17 @@ (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) +;; Optimize V2SImode load from memory, swapping the elements and +;; storing back into the memory into DImode rotate of the memory by 32. +(define_split + [(set (match_operand:V2SI 0 "memory_operand") + (vec_select:V2SI (match_dup 0) + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_64BIT && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())" + [(set (match_dup 0) + (rotate:DI (match_dup 0) (const_int 32)))] + "operands[0] = adjust_address (operands[0], DImode, 0);") + (define_insn "mmx_pswapdv2si2" [(set (match_operand:V2SI 0 "register_operand" "=y,Yv") (vec_select:V2SI @@ -2094,10 +2105,10 @@ (set_attr "mode" "DI,TI")]) (define_insn "*vec_dupv4hi" - [(set (match_operand:V4HI 0 "register_operand" "=y,xYw") + [(set (match_operand:V4HI 0 "register_operand" "=y,Yw") (vec_duplicate:V4HI (truncate:HI - (match_operand:SI 1 "register_operand" "0,xYw"))))] + (match_operand:SI 1 "register_operand" "0,Yw"))))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)" "@ diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index ee42ba2..b6dd5e9 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1486,6 +1486,10 @@ (define_predicate "div_operator" (match_code "div")) +;; Return true if this is a and, ior or xor operation. +(define_predicate "logic_operator" + (match_code "and,ior,xor")) + ;; Return true if this is a plus, minus, and, ior or xor operation. (define_predicate "plusminuslogic_operator" (match_code "plus,minus,and,ior,xor")) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index db5be59..ca4372d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -560,6 +560,14 @@ (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f") (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")]) +(define_mode_attr v_Yw + [(V16QI "Yw") (V32QI "Yw") (V64QI "v") + (V8HI "Yw") (V16HI "Yw") (V32HI "v") + (V4SI "v") (V8SI "v") (V16SI "v") + (V2DI "v") (V4DI "v") (V8DI "v") + (V4SF "v") (V8SF "v") (V16SF "v") + (V2DF "v") (V4DF "v") (V8DF "v")]) + (define_mode_attr sse2_avx_avx512f [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f") (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw") @@ -11677,10 +11685,10 @@ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*<insn><mode>3" - [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_Yw>") (plusminus:VI_AVX2 - (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v") - (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))] + (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_Yw>") + (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_Yw>mBr")))] "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} @@ -11790,9 +11798,9 @@ "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") (define_insn "*mul<mode>3<mask_name>" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") - (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v") - (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))] + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>") + (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>") + (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2])) && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ @@ -12618,10 +12626,10 @@ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*avx2_<code><mode>3" - [(set (match_operand:VI124_256 0 "register_operand" "=v") + [(set (match_operand:VI124_256 0 "register_operand" "=<v_Yw>") (maxmin:VI124_256 - (match_operand:VI124_256 1 "nonimmediate_operand" "%v") - (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))] + (match_operand:VI124_256 1 "nonimmediate_operand" "%<v_Yw>") + (match_operand:VI124_256 2 "nonimmediate_operand" "<v_Yw>m")))] "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") @@ -12745,10 +12753,10 @@ }) (define_insn "*sse4_1_<code><mode>3<mask_name>" - [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v") + [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_Yw>") (smaxmin:VI14_128 - (match_operand:VI14_128 1 "vector_operand" "%0,0,v") - (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))] + (match_operand:VI14_128 1 "vector_operand" "%0,0,<v_Yw>") + (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))] "TARGET_SSE4_1 && <mask_mode512bit_condition> && !(MEM_P (operands[1]) && MEM_P (operands[2]))" @@ -12830,10 +12838,10 @@ }) (define_insn "*sse4_1_<code><mode>3<mask_name>" - [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v") + [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_Yw>") (umaxmin:VI24_128 - (match_operand:VI24_128 1 "vector_operand" "%0,0,v") - (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))] + (match_operand:VI24_128 1 "vector_operand" "%0,0,<v_Yw>") + (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))] "TARGET_SSE4_1 && <mask_mode512bit_condition> && !(MEM_P (operands[1]) && MEM_P (operands[2]))" diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c index adc3f36..cc12196 100644 --- a/gcc/config/i386/winnt.c +++ b/gcc/config/i386/winnt.c @@ -830,9 +830,20 @@ i386_pe_asm_lto_end (void) struct seh_frame_state { - /* SEH records saves relative to the "current" stack pointer, whether - or not there's a frame pointer in place. This tracks the current - stack pointer offset from the CFA. */ + /* SEH records offsets relative to the lowest address of the fixed stack + allocation. If there is no frame pointer, these offsets are from the + stack pointer; if there is a frame pointer, these offsets are from the + value of the stack pointer when the frame pointer was established, i.e. + the frame pointer minus the offset in the .seh_setframe directive. + + We do not distinguish these two cases, i.e. we consider that the offsets + are always relative to the "current" stack pointer. This means that we + need to perform the fixed stack allocation before establishing the frame + pointer whenever there are registers to be saved, and this is guaranteed + by the prologue provided that we force the frame pointer to point at or + below the lowest used register save area, see ix86_compute_frame_layout. + + This tracks the current stack pointer offset from the CFA. */ HOST_WIDE_INT sp_offset; /* The CFA is located at CFA_REG + CFA_OFFSET. */ diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index ebb04b7..3155459 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -2381,7 +2381,7 @@ mips_symbol_insns (enum mips_symbol_type type, machine_mode mode) { /* MSA LD.* and ST.* cannot support loading symbols via an immediate operand. */ - if (MSA_SUPPORTED_MODE_P (mode)) + if (mode != MAX_MACHINE_MODE && MSA_SUPPORTED_MODE_P (mode)) return 0; return mips_symbol_insns_1 (type, mode) * (TARGET_MIPS16 ? 2 : 1); @@ -8400,7 +8400,7 @@ mips_expand_ext_as_unaligned_load (rtx dest, rtx src, HOST_WIDE_INT width, /* If TARGET_64BIT, the destination of a 32-bit "extz" or "extzv" will be a DImode, create a new temp and emit a zero extend at the end. */ if (GET_MODE (dest) == DImode - && REG_P (dest) + && (REG_P (dest) || (SUBREG_P (dest) && !MEM_P (SUBREG_REG (dest)))) && GET_MODE_BITSIZE (SImode) == width) { dest1 = dest; diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c index bdacade..c1c2e6e 100644 --- a/gcc/config/mn10300/mn10300.c +++ b/gcc/config/mn10300/mn10300.c @@ -2847,9 +2847,10 @@ mn10300_conditional_register_usage (void) with the old cc0-based compiler. */ static rtx_insn * -mn10300_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/, - vec<const char *> &/*constraints*/, - vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) +mn10300_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, + vec<const char *> & /*constraints*/, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs) { clobbers.safe_push (gen_rtx_REG (CCmode, CC_REG)); SET_HARD_REG_BIT (clobbered_regs, CC_REG); diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c index 226da0b..7217d78 100644 --- a/gcc/config/nds32/nds32.c +++ b/gcc/config/nds32/nds32.c @@ -4197,6 +4197,7 @@ nds32_option_override (void) static rtx_insn * nds32_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED, vec<rtx> &inputs ATTRIBUTE_UNUSED, + vec<machine_mode> &input_modes ATTRIBUTE_UNUSED, vec<const char *> &constraints ATTRIBUTE_UNUSED, vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) { diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index 3921b5c..d7fcd11 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -293,7 +293,7 @@ static size_t n_deferred_plabels = 0; #undef TARGET_ASM_OUTPUT_MI_THUNK #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK -#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true #undef TARGET_ASM_FILE_END #define TARGET_ASM_FILE_END pa_file_end @@ -8461,12 +8461,15 @@ pa_is_function_label_plus_const (rtx op) && GET_CODE (XEXP (op, 1)) == CONST_INT); } -/* Output assembly code for a thunk to FUNCTION. */ +/* Output the assembler code for a thunk function. THUNK_DECL is the + declaration for the thunk function itself, FUNCTION is the decl for + the target function. DELTA is an immediate constant offset to be + added to THIS. If VCALL_OFFSET is nonzero, the word at + *(*this + vcall_offset) should be added to THIS. */ static void pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, - HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, - tree function) + HOST_WIDE_INT vcall_offset, tree function) { const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); static unsigned int current_thunk_number; @@ -8482,201 +8485,386 @@ pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, assemble_start_function (thunk_fndecl, fnname); final_start_function (emit_barrier (), file, 1); - /* Output the thunk. We know that the function is in the same - translation unit (i.e., the same space) as the thunk, and that - thunks are output after their method. Thus, we don't need an - external branch to reach the function. With SOM and GAS, - functions and thunks are effectively in different sections. - Thus, we can always use a IA-relative branch and the linker - will add a long branch stub if necessary. - - However, we have to be careful when generating PIC code on the - SOM port to ensure that the sequence does not transfer to an - import stub for the target function as this could clobber the - return value saved at SP-24. This would also apply to the - 32-bit linux port if the multi-space model is implemented. */ - if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME - && !(flag_pic && TREE_PUBLIC (function)) - && (TARGET_GAS || last_address < 262132)) - || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME - && ((targetm_common.have_named_sections - && DECL_SECTION_NAME (thunk_fndecl) != NULL - /* The GNU 64-bit linker has rather poor stub management. - So, we use a long branch from thunks that aren't in - the same section as the target function. */ - && ((!TARGET_64BIT - && (DECL_SECTION_NAME (thunk_fndecl) - != DECL_SECTION_NAME (function))) - || ((DECL_SECTION_NAME (thunk_fndecl) - == DECL_SECTION_NAME (function)) - && last_address < 262132))) - /* In this case, we need to be able to reach the start of - the stub table even though the function is likely closer - and can be jumped to directly. */ - || (targetm_common.have_named_sections - && DECL_SECTION_NAME (thunk_fndecl) == NULL - && DECL_SECTION_NAME (function) == NULL - && total_code_bytes < MAX_PCREL17F_OFFSET) - /* Likewise. */ - || (!targetm_common.have_named_sections - && total_code_bytes < MAX_PCREL17F_OFFSET)))) - { - if (!val_14) - output_asm_insn ("addil L'%2,%%r26", xoperands); - - output_asm_insn ("b %0", xoperands); - - if (val_14) - { - output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); - nbytes += 8; + if (!vcall_offset) + { + /* Output the thunk. We know that the function is in the same + translation unit (i.e., the same space) as the thunk, and that + thunks are output after their method. Thus, we don't need an + external branch to reach the function. With SOM and GAS, + functions and thunks are effectively in different sections. + Thus, we can always use a IA-relative branch and the linker + will add a long branch stub if necessary. + + However, we have to be careful when generating PIC code on the + SOM port to ensure that the sequence does not transfer to an + import stub for the target function as this could clobber the + return value saved at SP-24. This would also apply to the + 32-bit linux port if the multi-space model is implemented. */ + if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME + && !(flag_pic && TREE_PUBLIC (function)) + && (TARGET_GAS || last_address < 262132)) + || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME + && ((targetm_common.have_named_sections + && DECL_SECTION_NAME (thunk_fndecl) != NULL + /* The GNU 64-bit linker has rather poor stub management. + So, we use a long branch from thunks that aren't in + the same section as the target function. */ + && ((!TARGET_64BIT + && (DECL_SECTION_NAME (thunk_fndecl) + != DECL_SECTION_NAME (function))) + || ((DECL_SECTION_NAME (thunk_fndecl) + == DECL_SECTION_NAME (function)) + && last_address < 262132))) + /* In this case, we need to be able to reach the start of + the stub table even though the function is likely closer + and can be jumped to directly. */ + || (targetm_common.have_named_sections + && DECL_SECTION_NAME (thunk_fndecl) == NULL + && DECL_SECTION_NAME (function) == NULL + && total_code_bytes < MAX_PCREL17F_OFFSET) + /* Likewise. */ + || (!targetm_common.have_named_sections + && total_code_bytes < MAX_PCREL17F_OFFSET)))) + { + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); + + output_asm_insn ("b %0", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 8; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 12; + } } - else + else if (TARGET_64BIT) { - output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); - nbytes += 12; - } - } - else if (TARGET_64BIT) - { - rtx xop[4]; + rtx xop[4]; + + /* We only have one call-clobbered scratch register, so we can't + make use of the delay slot if delta doesn't fit in 14 bits. */ + if (!val_14) + { + output_asm_insn ("addil L'%2,%%r26", xoperands); + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + } + + /* Load function address into %r1. */ + xop[0] = xoperands[0]; + xop[1] = gen_rtx_REG (Pmode, 1); + xop[2] = xop[1]; + pa_output_pic_pcrel_sequence (xop); - /* We only have one call-clobbered scratch register, so we can't - make use of the delay slot if delta doesn't fit in 14 bits. */ - if (!val_14) + if (val_14) + { + output_asm_insn ("bv %%r0(%%r1)", xoperands); + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 20; + } + else + { + output_asm_insn ("bv,n %%r0(%%r1)", xoperands); + nbytes += 24; + } + } + else if (TARGET_PORTABLE_RUNTIME) { - output_asm_insn ("addil L'%2,%%r26", xoperands); - output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + output_asm_insn ("ldil L'%0,%%r1", xoperands); + output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands); + + if (!val_14) + output_asm_insn ("ldil L'%2,%%r26", xoperands); + + output_asm_insn ("bv %%r0(%%r22)", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 16; + } + else + { + output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands); + nbytes += 20; + } } + else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) + { + /* The function is accessible from outside this module. The only + way to avoid an import stub between the thunk and function is to + call the function directly with an indirect sequence similar to + that used by $$dyncall. This is possible because $$dyncall acts + as the import stub in an indirect call. */ + ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); + xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); + output_asm_insn ("addil LT'%3,%%r19", xoperands); + output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); + output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); + output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); + output_asm_insn ("depi 0,31,2,%%r22", xoperands); + output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); + output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); + + if (!val_14) + { + output_asm_insn ("addil L'%2,%%r26", xoperands); + nbytes += 4; + } - /* Load function address into %r1. */ - xop[0] = xoperands[0]; - xop[1] = gen_rtx_REG (Pmode, 1); - xop[2] = xop[1]; - pa_output_pic_pcrel_sequence (xop); + if (TARGET_PA_20) + { + output_asm_insn ("bve (%%r22)", xoperands); + nbytes += 36; + } + else if (TARGET_NO_SPACE_REGS) + { + output_asm_insn ("be 0(%%sr4,%%r22)", xoperands); + nbytes += 36; + } + else + { + output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); + output_asm_insn ("mtsp %%r21,%%sr0", xoperands); + output_asm_insn ("be 0(%%sr0,%%r22)", xoperands); + nbytes += 44; + } - if (val_14) - { - output_asm_insn ("bv %%r0(%%r1)", xoperands); - output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); - nbytes += 20; + if (val_14) + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + else + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); } - else + else if (flag_pic) { - output_asm_insn ("bv,n %%r0(%%r1)", xoperands); - nbytes += 24; - } - } - else if (TARGET_PORTABLE_RUNTIME) - { - output_asm_insn ("ldil L'%0,%%r1", xoperands); - output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands); + rtx xop[4]; - if (!val_14) - output_asm_insn ("ldil L'%2,%%r26", xoperands); + /* Load function address into %r22. */ + xop[0] = xoperands[0]; + xop[1] = gen_rtx_REG (Pmode, 1); + xop[2] = gen_rtx_REG (Pmode, 22); + pa_output_pic_pcrel_sequence (xop); - output_asm_insn ("bv %%r0(%%r22)", xoperands); + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); - if (val_14) - { - output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); - nbytes += 16; + output_asm_insn ("bv %%r0(%%r22)", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 20; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 24; + } } else { - output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands); - nbytes += 20; + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); + + output_asm_insn ("ldil L'%0,%%r22", xoperands); + output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 12; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 16; + } } } - else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) + else { - /* The function is accessible from outside this module. The only - way to avoid an import stub between the thunk and function is to - call the function directly with an indirect sequence similar to - that used by $$dyncall. This is possible because $$dyncall acts - as the import stub in an indirect call. */ - ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); - xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); - output_asm_insn ("addil LT'%3,%%r19", xoperands); - output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); - output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); - output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); - output_asm_insn ("depi 0,31,2,%%r22", xoperands); - output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); - output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); + rtx xop[4]; - if (!val_14) + /* Add DELTA to THIS. */ + if (val_14) { - output_asm_insn ("addil L'%2,%%r26", xoperands); + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); nbytes += 4; } - - if (TARGET_PA_20) + else { - output_asm_insn ("bve (%%r22)", xoperands); - nbytes += 36; + output_asm_insn ("addil L'%2,%%r26", xoperands); + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 8; } - else if (TARGET_NO_SPACE_REGS) + + if (TARGET_64BIT) { - output_asm_insn ("be 0(%%sr4,%%r22)", xoperands); - nbytes += 36; + /* Load *(THIS + DELTA) to %r1. */ + output_asm_insn ("ldd 0(%%r26),%%r1", xoperands); + + val_14 = VAL_14_BITS_P (vcall_offset); + xoperands[2] = GEN_INT (vcall_offset); + + /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */ + if (val_14) + { + output_asm_insn ("ldd %2(%%r1),%%r1", xoperands); + nbytes += 8; + } + else + { + output_asm_insn ("addil L'%2,%%r1", xoperands); + output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands); + nbytes += 12; + } } else { - output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); - output_asm_insn ("mtsp %%r21,%%sr0", xoperands); - output_asm_insn ("be 0(%%sr0,%%r22)", xoperands); - nbytes += 44; - } + /* Load *(THIS + DELTA) to %r1. */ + output_asm_insn ("ldw 0(%%r26),%%r1", xoperands); - if (val_14) - output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); - else - output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); - } - else if (flag_pic) - { - rtx xop[4]; - - /* Load function address into %r22. */ - xop[0] = xoperands[0]; - xop[1] = gen_rtx_REG (Pmode, 1); - xop[2] = gen_rtx_REG (Pmode, 22); - pa_output_pic_pcrel_sequence (xop); + val_14 = VAL_14_BITS_P (vcall_offset); + xoperands[2] = GEN_INT (vcall_offset); - if (!val_14) - output_asm_insn ("addil L'%2,%%r26", xoperands); - - output_asm_insn ("bv %%r0(%%r22)", xoperands); + /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */ + if (val_14) + { + output_asm_insn ("ldw %2(%%r1),%%r1", xoperands); + nbytes += 8; + } + else + { + output_asm_insn ("addil L'%2,%%r1", xoperands); + output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands); + nbytes += 12; + } + } - if (val_14) + /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */ + if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME + && !(flag_pic && TREE_PUBLIC (function)) + && (TARGET_GAS || last_address < 262132)) + || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME + && ((targetm_common.have_named_sections + && DECL_SECTION_NAME (thunk_fndecl) != NULL + /* The GNU 64-bit linker has rather poor stub management. + So, we use a long branch from thunks that aren't in + the same section as the target function. */ + && ((!TARGET_64BIT + && (DECL_SECTION_NAME (thunk_fndecl) + != DECL_SECTION_NAME (function))) + || ((DECL_SECTION_NAME (thunk_fndecl) + == DECL_SECTION_NAME (function)) + && last_address < 262132))) + /* In this case, we need to be able to reach the start of + the stub table even though the function is likely closer + and can be jumped to directly. */ + || (targetm_common.have_named_sections + && DECL_SECTION_NAME (thunk_fndecl) == NULL + && DECL_SECTION_NAME (function) == NULL + && total_code_bytes < MAX_PCREL17F_OFFSET) + /* Likewise. */ + || (!targetm_common.have_named_sections + && total_code_bytes < MAX_PCREL17F_OFFSET)))) { - output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); - nbytes += 20; + nbytes += 4; + output_asm_insn ("b %0", xoperands); + + /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ + output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); } - else + else if (TARGET_64BIT) { - output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); - nbytes += 24; - } - } - else - { - if (!val_14) - output_asm_insn ("addil L'%2,%%r26", xoperands); + /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ + output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); - output_asm_insn ("ldil L'%0,%%r22", xoperands); - output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); + /* Load function address into %r1. */ + nbytes += 16; + xop[0] = xoperands[0]; + xop[1] = gen_rtx_REG (Pmode, 1); + xop[2] = xop[1]; + pa_output_pic_pcrel_sequence (xop); - if (val_14) + output_asm_insn ("bv,n %%r0(%%r1)", xoperands); + } + else if (TARGET_PORTABLE_RUNTIME) { - output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + /* Load function address into %r22. */ nbytes += 12; + output_asm_insn ("ldil L'%0,%%r22", xoperands); + output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands); + + output_asm_insn ("bv %%r0(%%r22)", xoperands); + + /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ + output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); + } + else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) + { + /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ + output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); + + /* The function is accessible from outside this module. The only + way to avoid an import stub between the thunk and function is to + call the function directly with an indirect sequence similar to + that used by $$dyncall. This is possible because $$dyncall acts + as the import stub in an indirect call. */ + ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); + xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); + output_asm_insn ("addil LT'%3,%%r19", xoperands); + output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); + output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); + output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); + output_asm_insn ("depi 0,31,2,%%r22", xoperands); + output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); + output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); + + if (TARGET_PA_20) + { + output_asm_insn ("bve,n (%%r22)", xoperands); + nbytes += 32; + } + else if (TARGET_NO_SPACE_REGS) + { + output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands); + nbytes += 32; + } + else + { + output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); + output_asm_insn ("mtsp %%r21,%%sr0", xoperands); + output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands); + nbytes += 40; + } } - else + else if (flag_pic) { - output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ + output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); + + /* Load function address into %r1. */ nbytes += 16; + xop[0] = xoperands[0]; + xop[1] = gen_rtx_REG (Pmode, 1); + xop[2] = xop[1]; + pa_output_pic_pcrel_sequence (xop); + + output_asm_insn ("bv,n %%r0(%%r1)", xoperands); + } + else + { + /* Load function address into %r22. */ + nbytes += 8; + output_asm_insn ("ldil L'%0,%%r22", xoperands); + output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); + + /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ + output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); } } diff --git a/gcc/config/pdp11/pdp11.c b/gcc/config/pdp11/pdp11.c index bd6e0dc..eb3bea4 100644 --- a/gcc/config/pdp11/pdp11.c +++ b/gcc/config/pdp11/pdp11.c @@ -155,7 +155,7 @@ static bool pdp11_rtx_costs (rtx, machine_mode, int, int, int *, bool); static int pdp11_addr_cost (rtx, machine_mode, addr_space_t, bool); static int pdp11_insn_cost (rtx_insn *insn, bool speed); static rtx_insn *pdp11_md_asm_adjust (vec<rtx> &, vec<rtx> &, - vec<const char *> &, + vec<machine_mode> &, vec<const char *> &, vec<rtx> &, HARD_REG_SET &); static bool pdp11_return_in_memory (const_tree, const_tree); static rtx pdp11_function_value (const_tree, const_tree, bool); @@ -2139,9 +2139,10 @@ pdp11_cmp_length (rtx *operands, int words) compiler. */ static rtx_insn * -pdp11_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/, - vec<const char *> &/*constraints*/, - vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) +pdp11_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, + vec<const char *> & /*constraints*/, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs) { clobbers.safe_push (gen_rtx_REG (CCmode, CC_REGNUM)); SET_HARD_REG_BIT (clobbered_regs, CC_REGNUM); diff --git a/gcc/config/riscv/riscv-shorten-memrefs.c b/gcc/config/riscv/riscv-shorten-memrefs.c index b1b57f1..3f34065 100644 --- a/gcc/config/riscv/riscv-shorten-memrefs.c +++ b/gcc/config/riscv/riscv-shorten-memrefs.c @@ -75,12 +75,19 @@ private: regno_map * analyze (basic_block bb); void transform (regno_map *m, basic_block bb); - bool get_si_mem_base_reg (rtx mem, rtx *addr); + bool get_si_mem_base_reg (rtx mem, rtx *addr, bool *extend); }; // class pass_shorten_memrefs bool -pass_shorten_memrefs::get_si_mem_base_reg (rtx mem, rtx *addr) +pass_shorten_memrefs::get_si_mem_base_reg (rtx mem, rtx *addr, bool *extend) { + /* Whether it's sign/zero extended. */ + if (GET_CODE (mem) == ZERO_EXTEND || GET_CODE (mem) == SIGN_EXTEND) + { + *extend = true; + mem = XEXP (mem, 0); + } + if (!MEM_P (mem) || GET_MODE (mem) != SImode) return false; *addr = XEXP (mem, 0); @@ -110,7 +117,8 @@ pass_shorten_memrefs::analyze (basic_block bb) { rtx mem = XEXP (pat, i); rtx addr; - if (get_si_mem_base_reg (mem, &addr)) + bool extend = false; + if (get_si_mem_base_reg (mem, &addr, &extend)) { HOST_WIDE_INT regno = REGNO (XEXP (addr, 0)); /* Do not count store zero as these cannot be compressed. */ @@ -150,7 +158,8 @@ pass_shorten_memrefs::transform (regno_map *m, basic_block bb) { rtx mem = XEXP (pat, i); rtx addr; - if (get_si_mem_base_reg (mem, &addr)) + bool extend = false; + if (get_si_mem_base_reg (mem, &addr, &extend)) { HOST_WIDE_INT regno = REGNO (XEXP (addr, 0)); /* Do not transform store zero as these cannot be compressed. */ @@ -161,9 +170,20 @@ pass_shorten_memrefs::transform (regno_map *m, basic_block bb) } if (m->get_or_insert (regno) > 3) { - addr - = targetm.legitimize_address (addr, addr, GET_MODE (mem)); - XEXP (pat, i) = replace_equiv_address (mem, addr); + if (extend) + { + addr + = targetm.legitimize_address (addr, addr, + GET_MODE (XEXP (mem, 0))); + XEXP (XEXP (pat, i), 0) + = replace_equiv_address (XEXP (mem, 0), addr); + } + else + { + addr = targetm.legitimize_address (addr, addr, + GET_MODE (mem)); + XEXP (pat, i) = replace_equiv_address (mem, addr); + } df_insn_rescan (insn); } } diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c index ff41795..fffd081 100644 --- a/gcc/config/riscv/riscv.c +++ b/gcc/config/riscv/riscv.c @@ -891,17 +891,13 @@ riscv_compressed_lw_address_p (rtx x) bool result = riscv_classify_address (&addr, x, GET_MODE (x), reload_completed); - /* Before reload, assuming all load/stores of valid addresses get compressed - gives better code size than checking if the address is reg + small_offset - early on. */ - if (result && !reload_completed) - return true; - /* Return false if address is not compressed_reg + small_offset. */ if (!result || addr.type != ADDRESS_REG - || (!riscv_compressed_reg_p (REGNO (addr.reg)) - && addr.reg != stack_pointer_rtx) + /* Before reload, assume all registers are OK. */ + || (reload_completed + && !riscv_compressed_reg_p (REGNO (addr.reg)) + && addr.reg != stack_pointer_rtx) || !riscv_compressed_lw_offset_p (addr.offset)) return false; @@ -1528,6 +1524,28 @@ riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src) bool riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) { + /* Expand + (set (reg:QI target) (mem:QI (address))) + to + (set (reg:DI temp) (zero_extend:DI (mem:QI (address)))) + (set (reg:QI target) (subreg:QI (reg:DI temp) 0)) + with auto-sign/zero extend. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < UNITS_PER_WORD + && can_create_pseudo_p () + && MEM_P (src)) + { + rtx temp_reg; + int zero_extend_p; + + temp_reg = gen_reg_rtx (word_mode); + zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND); + emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode, + zero_extend_p)); + riscv_emit_move (dest, gen_lowpart (mode, temp_reg)); + return true; + } + if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) { rtx reg; @@ -1708,6 +1726,13 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN instructions it needs. */ if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0) { + /* When optimizing for size, make uncompressible 32-bit addresses + more expensive so that compressible 32-bit addresses are + preferred. */ + if (TARGET_RVC && !speed && riscv_mshorten_memrefs && mode == SImode + && !riscv_compressed_lw_address_p (XEXP (x, 0))) + cost++; + *total = COSTS_N_INSNS (cost + tune_param->memory_cost); return true; } diff --git a/gcc/config/rs6000/aix.h b/gcc/config/rs6000/aix.h index 5e8743a..2db50c8 100644 --- a/gcc/config/rs6000/aix.h +++ b/gcc/config/rs6000/aix.h @@ -224,7 +224,8 @@ /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */ #define ADJUST_FIELD_ALIGN(FIELD, TYPE, COMPUTED) \ ((TARGET_ALIGN_NATURAL == 0 \ - && TYPE_MODE (strip_array_types (TYPE)) == DFmode) \ + && (TYPE_MODE (strip_array_types (TYPE)) == DFmode \ + || TYPE_MODE (strip_array_types (TYPE)) == DCmode)) \ ? MIN ((COMPUTED), 32) \ : (COMPUTED)) diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md index c8cdb64..026be5d 100644 --- a/gcc/config/rs6000/dfp.md +++ b/gcc/config/rs6000/dfp.md @@ -139,7 +139,8 @@ (float_extend:TD (match_operand:DD 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "dctqpq %0,%1" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "128")]) ;; The result of drdpq is an even/odd register pair with the converted ;; value in the even register and zero in the odd register. @@ -153,6 +154,7 @@ "TARGET_DFP" "drdpq %2,%1\;fmr %0,%2" [(set_attr "type" "dfp") + (set_attr "size" "128") (set_attr "length" "8")]) (define_insn "trunctdsd2" @@ -206,7 +208,8 @@ (match_operand:DDTD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "dcmpu<q> %0,%1,%2" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "<bits>")]) (define_insn "floatdidd2" [(set (match_operand:DD 0 "gpc_reg_operand" "=d") @@ -220,7 +223,8 @@ (float:TD (match_operand:DI 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "dcffixq %0,%1" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "128")]) ;; Convert a decimal64/128 to a decimal64/128 whose value is an integer. ;; This is the first stage of converting it to an integer type. @@ -230,7 +234,8 @@ (fix:DDTD (match_operand:DDTD 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "drintn<q>. 0,%0,%1,1" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "<bits>")]) ;; Convert a decimal64/128 whose value is an integer to an actual integer. ;; This is the second stage of converting decimal float to integer type. @@ -240,7 +245,8 @@ (fix:DI (match_operand:DDTD 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "dctfix<q> %0,%1" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "<bits>")]) ;; Decimal builtin support @@ -262,7 +268,8 @@ UNSPEC_DDEDPD))] "TARGET_DFP" "ddedpd<q> %1,%0,%2" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "<bits>")]) (define_insn "dfp_denbcd_<mode>" [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d") @@ -271,7 +278,8 @@ UNSPEC_DENBCD))] "TARGET_DFP" "denbcd<q> %1,%0,%2" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "<bits>")]) (define_insn "dfp_denbcd_v16qi_inst" [(set (match_operand:TD 0 "gpc_reg_operand" "=d") @@ -301,7 +309,8 @@ UNSPEC_DXEX))] "TARGET_DFP" "dxex<q> %0,%1" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "<bits>")]) (define_insn "dfp_diex_<mode>" [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d") @@ -310,7 +319,8 @@ UNSPEC_DXEX))] "TARGET_DFP" "diex<q> %0,%1,%2" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "<bits>")]) (define_expand "dfptstsfi_<code>_<mode>" [(set (match_dup 3) @@ -349,7 +359,8 @@ operands[1] = GEN_INT (63); return "dtstsfi<q> %0,%1,%2"; } - [(set_attr "type" "fp")]) + [(set_attr "type" "fp") + (set_attr "size" "<bits>")]) (define_insn "dfp_dscli_<mode>" [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d") @@ -358,7 +369,8 @@ UNSPEC_DSCLI))] "TARGET_DFP" "dscli<q> %0,%1,%2" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "<bits>")]) (define_insn "dfp_dscri_<mode>" [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d") @@ -367,4 +379,5 @@ UNSPEC_DSCRI))] "TARGET_DFP" "dscri<q> %0,%1,%2" - [(set_attr "type" "dfp")]) + [(set_attr "type" "dfp") + (set_attr "size" "<bits>")]) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 737a6da..56478fc 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1,7 +1,6 @@ -;; -*- buffer-read-only: t -*- ;; Generated automatically by genfusion.pl -;; Copyright (C) 2020 Free Software Foundation, Inc. +;; Copyright (C) 2020,2021 Free Software Foundation, Inc. ;; ;; This file is part of GCC. ;; @@ -23,18 +22,18 @@ ;; load mode is DI result mode is clobber compare mode is CC extend is none (define_insn_and_split "*ld_cmpdi_cr0_DI_clobber_CC_none" [(set (match_operand:CC 2 "cc_reg_operand" "=x") - (compare:CC (match_operand:DI 1 "non_update_memory_operand" "m") - (match_operand:DI 3 "const_m1_to_1_operand" "n"))) + (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m") + (match_operand:DI 3 "const_m1_to_1_operand" "n"))) (clobber (match_scratch:DI 0 "=r"))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "ld%X1 %0,%1\;cmpdi 0,%0,%3" + "ld%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + DImode, NON_PREFIXED_DS))" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) - (compare:CC (match_dup 0) - (match_dup 3)))] + (compare:CC (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -44,18 +43,18 @@ ;; load mode is DI result mode is clobber compare mode is CCUNS extend is none (define_insn_and_split "*ld_cmpldi_cr0_DI_clobber_CCUNS_none" [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x") - (compare:CCUNS (match_operand:DI 1 "non_update_memory_operand" "m") - (match_operand:DI 3 "const_0_to_1_operand" "n"))) + (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "m") + (match_operand:DI 3 "const_0_to_1_operand" "n"))) (clobber (match_scratch:DI 0 "=r"))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "ld%X1 %0,%1\;cmpldi 0,%0,%3" + "ld%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + DImode, NON_PREFIXED_DS))" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) - (compare:CCUNS (match_dup 0) - (match_dup 3)))] + (compare:CCUNS (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -65,18 +64,18 @@ ;; load mode is DI result mode is DI compare mode is CC extend is none (define_insn_and_split "*ld_cmpdi_cr0_DI_DI_CC_none" [(set (match_operand:CC 2 "cc_reg_operand" "=x") - (compare:CC (match_operand:DI 1 "non_update_memory_operand" "m") - (match_operand:DI 3 "const_m1_to_1_operand" "n"))) + (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m") + (match_operand:DI 3 "const_m1_to_1_operand" "n"))) (set (match_operand:DI 0 "gpc_reg_operand" "=r") (match_dup 1))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "ld%X1 %0,%1\;cmpdi 0,%0,%3" + "ld%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + DImode, NON_PREFIXED_DS))" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) - (compare:CC (match_dup 0) - (match_dup 3)))] + (compare:CC (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -86,18 +85,18 @@ ;; load mode is DI result mode is DI compare mode is CCUNS extend is none (define_insn_and_split "*ld_cmpldi_cr0_DI_DI_CCUNS_none" [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x") - (compare:CCUNS (match_operand:DI 1 "non_update_memory_operand" "m") - (match_operand:DI 3 "const_0_to_1_operand" "n"))) + (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "m") + (match_operand:DI 3 "const_0_to_1_operand" "n"))) (set (match_operand:DI 0 "gpc_reg_operand" "=r") (match_dup 1))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "ld%X1 %0,%1\;cmpldi 0,%0,%3" + "ld%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + DImode, NON_PREFIXED_DS))" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) - (compare:CCUNS (match_dup 0) - (match_dup 3)))] + (compare:CCUNS (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -107,18 +106,18 @@ ;; load mode is SI result mode is clobber compare mode is CC extend is none (define_insn_and_split "*lwa_cmpdi_cr0_SI_clobber_CC_none" [(set (match_operand:CC 2 "cc_reg_operand" "=x") - (compare:CC (match_operand:SI 1 "non_update_memory_operand" "m") - (match_operand:SI 3 "const_m1_to_1_operand" "n"))) + (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m") + (match_operand:SI 3 "const_m1_to_1_operand" "n"))) (clobber (match_scratch:SI 0 "=r"))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lwa%X1 %0,%1\;cmpdi 0,%0,%3" + "lwa%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_DS))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + SImode, NON_PREFIXED_DS))" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) - (compare:CC (match_dup 0) - (match_dup 3)))] + (compare:CC (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -129,17 +128,17 @@ (define_insn_and_split "*lwz_cmpldi_cr0_SI_clobber_CCUNS_none" [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x") (compare:CCUNS (match_operand:SI 1 "non_update_memory_operand" "m") - (match_operand:SI 3 "const_0_to_1_operand" "n"))) + (match_operand:SI 3 "const_0_to_1_operand" "n"))) (clobber (match_scratch:SI 0 "=r"))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lwz%X1 %0,%1\;cmpldi 0,%0,%3" + "lwz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_D))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + SImode, NON_PREFIXED_D))" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) - (compare:CCUNS (match_dup 0) - (match_dup 3)))] + (compare:CCUNS (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -149,18 +148,18 @@ ;; load mode is SI result mode is SI compare mode is CC extend is none (define_insn_and_split "*lwa_cmpdi_cr0_SI_SI_CC_none" [(set (match_operand:CC 2 "cc_reg_operand" "=x") - (compare:CC (match_operand:SI 1 "non_update_memory_operand" "m") - (match_operand:SI 3 "const_m1_to_1_operand" "n"))) + (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m") + (match_operand:SI 3 "const_m1_to_1_operand" "n"))) (set (match_operand:SI 0 "gpc_reg_operand" "=r") (match_dup 1))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lwa%X1 %0,%1\;cmpdi 0,%0,%3" + "lwa%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_DS))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + SImode, NON_PREFIXED_DS))" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) - (compare:CC (match_dup 0) - (match_dup 3)))] + (compare:CC (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -171,17 +170,17 @@ (define_insn_and_split "*lwz_cmpldi_cr0_SI_SI_CCUNS_none" [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x") (compare:CCUNS (match_operand:SI 1 "non_update_memory_operand" "m") - (match_operand:SI 3 "const_0_to_1_operand" "n"))) + (match_operand:SI 3 "const_0_to_1_operand" "n"))) (set (match_operand:SI 0 "gpc_reg_operand" "=r") (match_dup 1))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lwz%X1 %0,%1\;cmpldi 0,%0,%3" + "lwz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_D))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + SImode, NON_PREFIXED_D))" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) - (compare:CCUNS (match_dup 0) - (match_dup 3)))] + (compare:CCUNS (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -191,18 +190,18 @@ ;; load mode is SI result mode is EXTSI compare mode is CC extend is sign (define_insn_and_split "*lwa_cmpdi_cr0_SI_EXTSI_CC_sign" [(set (match_operand:CC 2 "cc_reg_operand" "=x") - (compare:CC (match_operand:SI 1 "non_update_memory_operand" "m") - (match_operand:SI 3 "const_m1_to_1_operand" "n"))) + (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m") + (match_operand:SI 3 "const_m1_to_1_operand" "n"))) (set (match_operand:EXTSI 0 "gpc_reg_operand" "=r") (sign_extend:EXTSI (match_dup 1)))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lwa%X1 %0,%1\;cmpdi 0,%0,%3" + "lwa%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_DS))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + SImode, NON_PREFIXED_DS))" [(set (match_dup 0) (sign_extend:EXTSI (match_dup 1))) (set (match_dup 2) - (compare:CC (match_dup 0) - (match_dup 3)))] + (compare:CC (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -213,17 +212,17 @@ (define_insn_and_split "*lwz_cmpldi_cr0_SI_EXTSI_CCUNS_zero" [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x") (compare:CCUNS (match_operand:SI 1 "non_update_memory_operand" "m") - (match_operand:SI 3 "const_0_to_1_operand" "n"))) + (match_operand:SI 3 "const_0_to_1_operand" "n"))) (set (match_operand:EXTSI 0 "gpc_reg_operand" "=r") (zero_extend:EXTSI (match_dup 1)))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lwz%X1 %0,%1\;cmpldi 0,%0,%3" + "lwz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_D))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + SImode, NON_PREFIXED_D))" [(set (match_dup 0) (zero_extend:EXTSI (match_dup 1))) (set (match_dup 2) - (compare:CCUNS (match_dup 0) - (match_dup 3)))] + (compare:CCUNS (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -234,17 +233,17 @@ (define_insn_and_split "*lha_cmpdi_cr0_HI_clobber_CC_sign" [(set (match_operand:CC 2 "cc_reg_operand" "=x") (compare:CC (match_operand:HI 1 "non_update_memory_operand" "m") - (match_operand:HI 3 "const_m1_to_1_operand" "n"))) + (match_operand:HI 3 "const_m1_to_1_operand" "n"))) (clobber (match_scratch:GPR 0 "=r"))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lha%X1 %0,%1\;cmpdi 0,%0,%3" + "lha%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), HImode, NON_PREFIXED_D))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + HImode, NON_PREFIXED_D))" [(set (match_dup 0) (sign_extend:GPR (match_dup 1))) (set (match_dup 2) - (compare:CC (match_dup 0) - (match_dup 3)))] + (compare:CC (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -255,17 +254,17 @@ (define_insn_and_split "*lhz_cmpldi_cr0_HI_clobber_CCUNS_zero" [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x") (compare:CCUNS (match_operand:HI 1 "non_update_memory_operand" "m") - (match_operand:HI 3 "const_0_to_1_operand" "n"))) + (match_operand:HI 3 "const_0_to_1_operand" "n"))) (clobber (match_scratch:GPR 0 "=r"))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lhz%X1 %0,%1\;cmpldi 0,%0,%3" + "lhz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), HImode, NON_PREFIXED_D))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + HImode, NON_PREFIXED_D))" [(set (match_dup 0) (zero_extend:GPR (match_dup 1))) (set (match_dup 2) - (compare:CCUNS (match_dup 0) - (match_dup 3)))] + (compare:CCUNS (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -276,17 +275,17 @@ (define_insn_and_split "*lha_cmpdi_cr0_HI_EXTHI_CC_sign" [(set (match_operand:CC 2 "cc_reg_operand" "=x") (compare:CC (match_operand:HI 1 "non_update_memory_operand" "m") - (match_operand:HI 3 "const_m1_to_1_operand" "n"))) + (match_operand:HI 3 "const_m1_to_1_operand" "n"))) (set (match_operand:EXTHI 0 "gpc_reg_operand" "=r") (sign_extend:EXTHI (match_dup 1)))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lha%X1 %0,%1\;cmpdi 0,%0,%3" + "lha%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), HImode, NON_PREFIXED_D))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + HImode, NON_PREFIXED_D))" [(set (match_dup 0) (sign_extend:EXTHI (match_dup 1))) (set (match_dup 2) - (compare:CC (match_dup 0) - (match_dup 3)))] + (compare:CC (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -297,17 +296,17 @@ (define_insn_and_split "*lhz_cmpldi_cr0_HI_EXTHI_CCUNS_zero" [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x") (compare:CCUNS (match_operand:HI 1 "non_update_memory_operand" "m") - (match_operand:HI 3 "const_0_to_1_operand" "n"))) + (match_operand:HI 3 "const_0_to_1_operand" "n"))) (set (match_operand:EXTHI 0 "gpc_reg_operand" "=r") (zero_extend:EXTHI (match_dup 1)))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lhz%X1 %0,%1\;cmpldi 0,%0,%3" + "lhz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), HImode, NON_PREFIXED_D))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + HImode, NON_PREFIXED_D))" [(set (match_dup 0) (zero_extend:EXTHI (match_dup 1))) (set (match_dup 2) - (compare:CCUNS (match_dup 0) - (match_dup 3)))] + (compare:CCUNS (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -318,17 +317,17 @@ (define_insn_and_split "*lbz_cmpldi_cr0_QI_clobber_CCUNS_zero" [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x") (compare:CCUNS (match_operand:QI 1 "non_update_memory_operand" "m") - (match_operand:QI 3 "const_0_to_1_operand" "n"))) + (match_operand:QI 3 "const_0_to_1_operand" "n"))) (clobber (match_scratch:GPR 0 "=r"))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lbz%X1 %0,%1\;cmpldi 0,%0,%3" + "lbz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), QImode, NON_PREFIXED_D))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + QImode, NON_PREFIXED_D))" [(set (match_dup 0) (zero_extend:GPR (match_dup 1))) (set (match_dup 2) - (compare:CCUNS (match_dup 0) - (match_dup 3)))] + (compare:CCUNS (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") @@ -339,17 +338,17 @@ (define_insn_and_split "*lbz_cmpldi_cr0_QI_GPR_CCUNS_zero" [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x") (compare:CCUNS (match_operand:QI 1 "non_update_memory_operand" "m") - (match_operand:QI 3 "const_0_to_1_operand" "n"))) + (match_operand:QI 3 "const_0_to_1_operand" "n"))) (set (match_operand:GPR 0 "gpc_reg_operand" "=r") (zero_extend:GPR (match_dup 1)))] "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" - "lbz%X1 %0,%1\;cmpldi 0,%0,%3" + "lbz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) - || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), QImode, NON_PREFIXED_D))" + || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0), + QImode, NON_PREFIXED_D))" [(set (match_dup 0) (zero_extend:GPR (match_dup 1))) (set (match_dup 2) - (compare:CCUNS (match_dup 0) - (match_dup 3)))] + (compare:CCUNS (match_dup 0) (match_dup 3)))] "" [(set_attr "type" "load") (set_attr "cost" "8") diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index e1c45f5..c86c743 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -56,7 +56,7 @@ sub mode_to_ldst_char sub gen_ld_cmpi_p10 { my ($lmode, $ldst, $clobbermode, $result, $cmpl, $echr, $constpred, - $ccmode, $np, $extend, $resultmode); + $mempred, $ccmode, $np, $extend, $resultmode); LMODE: foreach $lmode ('DI','SI','HI','QI') { $ldst = mode_to_ldst_char($lmode); $clobbermode = $lmode; @@ -70,11 +70,13 @@ sub gen_ld_cmpi_p10 $result = "GPR" if $result eq "EXTQI"; CCMODE: foreach $ccmode ('CC','CCUNS') { $np = "NON_PREFIXED_D"; + $mempred = "non_update_memory_operand"; if ( $ccmode eq 'CC' ) { next CCMODE if $lmode eq 'QI'; if ( $lmode eq 'DI' || $lmode eq 'SI' ) { # ld and lwa are both DS-FORM. $np = "NON_PREFIXED_DS"; + $mempred = "ds_form_mem_operand"; } $cmpl = ""; $echr = "a"; @@ -83,6 +85,7 @@ sub gen_ld_cmpi_p10 if ( $lmode eq 'DI' ) { # ld is DS-form, but lwz is not. $np = "NON_PREFIXED_DS"; + $mempred = "ds_form_mem_operand"; } $cmpl = "l"; $echr = "z"; @@ -105,7 +108,7 @@ sub gen_ld_cmpi_p10 print "(define_insn_and_split \"*l${ldst}${echr}_cmp${cmpl}di_cr0_${lmode}_${result}_${ccmode}_${extend}\"\n"; print " [(set (match_operand:${ccmode} 2 \"cc_reg_operand\" \"=x\")\n"; - print " (compare:${ccmode} (match_operand:${lmode} 1 \"non_update_memory_operand\" \"m\")\n"; + print " (compare:${ccmode} (match_operand:${lmode} 1 \"${mempred}\" \"m\")\n"; if ($ccmode eq 'CCUNS') { print " "; } print " (match_operand:${lmode} 3 \"${constpred}\" \"n\")))\n"; if ($result eq 'clobber') { diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 87569f1..a00d3a3 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -288,6 +288,7 @@ DONE; } [(set_attr "type" "vecload,vecstore,veclogical") + (set_attr "size" "256") (set_attr "length" "*,*,8")]) @@ -321,7 +322,7 @@ (set_attr "length" "*,*,16") (set_attr "max_prefixed_insns" "2,2,*")]) -(define_expand "mma_assemble_pair" +(define_expand "vsx_assemble_pair" [(match_operand:OO 0 "vsx_register_operand") (match_operand:V16QI 1 "mma_assemble_input_operand") (match_operand:V16QI 2 "mma_assemble_input_operand")] @@ -334,7 +335,7 @@ DONE; }) -(define_insn_and_split "*mma_assemble_pair" +(define_insn_and_split "*vsx_assemble_pair" [(set (match_operand:OO 0 "vsx_register_operand" "=wa") (unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa") (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")] @@ -351,7 +352,7 @@ DONE; }) -(define_expand "mma_disassemble_pair" +(define_expand "vsx_disassemble_pair" [(match_operand:V16QI 0 "mma_disassemble_output_operand") (match_operand:OO 1 "vsx_register_operand") (match_operand 2 "const_0_to_1_operand")] @@ -366,7 +367,7 @@ DONE; }) -(define_insn_and_split "*mma_disassemble_pair" +(define_insn_and_split "*vsx_disassemble_pair" [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa") (unspec:V16QI [(match_operand:OO 1 "vsx_register_operand" "wa") (match_operand 2 "const_0_to_1_operand")] diff --git a/gcc/config/rs6000/pcrel-opt.md b/gcc/config/rs6000/pcrel-opt.md new file mode 100644 index 0000000..9706a39 --- /dev/null +++ b/gcc/config/rs6000/pcrel-opt.md @@ -0,0 +1,401 @@ +;; Machine description for the PCREL_OPT optimization. +;; Copyright (C) 2020-2021 Free Software Foundation, Inc. +;; Contributed by Michael Meissner (meissner@linux.ibm.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Support for the PCREL_OPT optimization. PCREL_OPT looks for instances where +;; an external variable is used only once, either for reading or for writing. +;; +;; If we are optimizing a single read, normally the code would look like: +;; +;; (set (reg:DI <ptr>) +;; (symbol_ref:DI "<extern_addr>")) # <data> is currently dead +;; +;; ... # insns do not need to be adjacent +;; +;; (set (reg:SI <data>) +;; (mem:SI (reg:DI <xxx>))) # <ptr> dies with this insn +;; +;; We transform this into: +;; +;; (parallel [(set (reg:DI <ptr>) +;; (unspec:SI [(symbol_ref:DI <extern_addr>) +;; (const_int <marker>)] +;; UNSPEC_PCREL_OPT_LD_ADDR)) +;; (set (reg:DI <data>) +;; (unspec:DI [(const_int 0)] +;; UNSPEC_PCREL_OPT_LD_DATA))]) +;; +;; ... +;; +;; (parallel [(set (reg:SI <data>) +;; (unspec:SI [(mem:SI (reg:DI <ptr>)) +;; (reg:DI <data>) +;; (const_int <marker>)] +;; UNSPEC_PCREL_OPT_LD_RELOC)) +;; (clobber (reg:DI <ptr>))]) +;; +;; The marker is an integer constant that links the load of the external +;; address to the load of the actual variable. +;; +;; In the first insn, we set both the address of the external variable, and +;; mark that the variable being loaded both are created in that insn, and are +;; consumed in the second insn. It doesn't matter what mode the register that +;; we will ultimately do the load into, so we use DImode. We just need to mark +;; that both registers may be set in the first insn, and will be used in the +;; second insn. +;; +;; Since we use UNSPEC's and link both the the register holding the external +;; address and the value being loaded, it should prevent other passes from +;; modifying it. +;; +;; If the register being loaded is the same as the base register, we use an +;; alternate form of the insns. +;; +;; (set (reg:DI <data_ptr>) +;; (unspec:DI [(symbol_ref:DI <extern_addr>) +;; (const_int <marker>)] +;; UNSPEC_PCREL_OPT_LD_SAME_REG)) +;; +;; ... +;; +;; (parallel [(set (reg:SI <data>) +;; (unspec:SI [(mem:SI (reg:DI <ptr>)) +;; (reg:DI <data>) +;; (const_int <marker>)] +;; UNSPEC_PCREL_OPT_LD_RELOC)) +;; (clobber (reg:DI <ptr>))]) + +(define_c_enum "unspec" + [UNSPEC_PCREL_OPT_LD_ADDR + UNSPEC_PCREL_OPT_LD_DATA + UNSPEC_PCREL_OPT_LD_SAME_REG + UNSPEC_PCREL_OPT_LD_RELOC + UNSPEC_PCREL_OPT_ST_ADDR + UNSPEC_PCREL_OPT_ST_RELOC]) + +;; Modes that are supported for PCREL_OPT +(define_mode_iterator PCRELOPT [QI HI SI DI TI SF DF KF + V1TI V2DI V4SI V8HI V16QI V2DF V4SF + (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")]) + +;; Vector modes for PCREL_OPT +(define_mode_iterator PCRELOPT_VECT [TI KF V1TI V2DI V4SI V8HI V16QI V2DF V4SF + (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")]) + +;; Insn for loading the external address, where the register being loaded is not +;; the same as the register being loaded with the data. +(define_insn "pcrel_opt_ld_addr" + [(set (match_operand:DI 0 "base_reg_operand" "=&b,&b") + (unspec:DI [(match_operand:DI 1 "pcrel_external_address") + (match_operand 2 "const_int_operand" "n,n")] + UNSPEC_PCREL_OPT_LD_ADDR)) + (set (match_operand:DI 3 "gpc_reg_operand" "=r,wa") + (unspec:DI [(const_int 0)] + UNSPEC_PCREL_OPT_LD_DATA))] + "TARGET_PCREL_OPT + && reg_or_subregno (operands[0]) != reg_or_subregno (operands[3])" + "ld %0,%a1\n.Lpcrel%2:" + [(set_attr "prefixed" "yes") + (set_attr "type" "load") + (set_attr "loads_external_address" "yes")]) + +;; Alternate form of loading up the external address that is the same register +;; as the final load. +(define_insn "pcrel_opt_ld_addr_same_reg" + [(set (match_operand:DI 0 "base_reg_operand" "=b") + (unspec:DI [(match_operand:DI 1 "pcrel_external_address") + (match_operand 2 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_LD_SAME_REG))] + "TARGET_PCREL_OPT" + "ld %0,%a1\n.Lpcrel%2:" + [(set_attr "prefixed" "yes") + (set_attr "type" "load") + (set_attr "loads_external_address" "yes")]) + +;; PCREL_OPT modes that are optimized for loading or storing GPRs. +(define_mode_iterator PCRELOPT_GPR [QI HI SI DI SF DF]) + +(define_mode_attr PCRELOPT_GPR_LD [(QI "lbz") + (HI "lhz") + (SI "lwz") + (SF "lwz") + (DI "ld") + (DF "ld")]) + +;; PCREL_OPT load operation of GPRs. Operand 4 (the register used to hold the +;; address of the external symbol) is SCRATCH if the same register is used for +;; the normal load. +(define_insn "*pcrel_opt_ld<mode>_gpr" + [(parallel [(set (match_operand:PCRELOPT_GPR 0 "int_reg_operand" "+r") + (unspec:PCRELOPT_GPR [ + (match_operand:PCRELOPT_GPR 1 "d_form_memory" "m") + (match_operand:DI 2 "int_reg_operand" "0") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_LD_RELOC)) + (clobber (match_scratch:DI 4 "=bX"))])] + "TARGET_PCREL_OPT + && (GET_CODE (operands[4]) == SCRATCH + || reg_mentioned_p (operands[4], operands[1]))" +{ + output_pcrel_opt_reloc (operands[3]); + return "<PCRELOPT_GPR_LD> %0,%1"; +} + [(set_attr "type" "load")]) + +;; PCREL_OPT load with sign/zero extension +(define_insn "*pcrel_opt_ldsi_<u><mode>_gpr" + [(set (match_operand:EXTSI 0 "int_reg_operand" "+r") + (any_extend:EXTSI + (unspec:SI [(match_operand:SI 1 "d_form_memory" "m") + (match_operand:DI 2 "int_reg_operand" "0") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_LD_RELOC))) + (clobber (match_scratch:DI 4 "=bX"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[3]); + return "lw<az> %0,%1"; +} + [(set_attr "type" "load")]) + +(define_insn "*pcrel_opt_ldhi_<u><mode>_gpr" + [(set (match_operand:EXTHI 0 "int_reg_operand" "+r") + (any_extend:EXTHI + (unspec:HI [(match_operand:HI 1 "d_form_memory" "m") + (match_operand:DI 2 "int_reg_operand" "0") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_LD_RELOC))) + (clobber (match_scratch:DI 4 "=bX"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[3]); + return "lh<az> %0,%1"; +} + [(set_attr "type" "load")]) + +(define_insn "*pcrel_opt_ldqi_u<mode>_gpr" + [(set (match_operand:EXTQI 0 "int_reg_operand" "+r") + (zero_extend:EXTQI + (unspec:QI [(match_operand:QI 1 "d_form_memory" "m") + (match_operand:DI 2 "int_reg_operand" "0") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_LD_RELOC))) + (clobber (match_scratch:DI 4 "=bX"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[3]); + return "lbz %0,%1"; +} + [(set_attr "type" "load")]) + +;; Scalar types that can be optimized by loading them into floating point +;; or Altivec registers. +(define_mode_iterator PCRELOPT_FP [DI DF SF]) + +;; Load instructions to load up scalar floating point or 64-bit integer values +;; into floating point registers or Altivec registers. +(define_mode_attr PCRELOPT_FPR_LD [(DI "lfd") (DF "lfd") (SF "lfs")]) +(define_mode_attr PCRELOPT_VMX_LD [(DI "lxsd") (DF "lxsd") (SF "lxssp")]) + +;; PCREL_OPT load operation of scalar DF/DI/SF into vector registers. +(define_insn "*pcrel_opt_ld<mode>_vsx" + [(set (match_operand:PCRELOPT_FP 0 "vsx_register_operand" "+d,v") + (unspec:PCRELOPT_FP [(match_operand:PCRELOPT_FP 1 "d_form_memory" "m,m") + (match_operand:DI 2 "vsx_register_operand" "0,0") + (match_operand 3 "const_int_operand" "n,n")] + UNSPEC_PCREL_OPT_LD_RELOC)) + (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[3]); + return which_alternative ? "<PCRELOPT_VMX_LD> %0,%1" + : "<PCRELOPT_FPR_LD> %0,%1"; +} + [(set_attr "type" "fpload")]) + +;; PCREL_OPT optimization extending SFmode to DFmode via a load. +(define_insn "*pcrel_opt_ldsf_df" + [(set (match_operand:DF 0 "vsx_register_operand" "+d,v") + (float_extend:DF + (unspec:SF [(match_operand:SF 1 "d_form_memory" "m,m") + (match_operand:DI 2 "vsx_register_operand" "0,0") + (match_operand 3 "const_int_operand" "n,n")] + UNSPEC_PCREL_OPT_LD_RELOC))) + (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[3]); + return which_alternative ? "lxssp %0,%1" : "lfs %0,%1"; +} + [(set_attr "type" "fpload")]) + +;; PCREL_OPT load operation of vector/float128 types into vector registers. +(define_insn "*pcrel_opt_ld<mode>" + [(set (match_operand:PCRELOPT_VECT 0 "vsx_register_operand" "+wa") + (unspec:PCRELOPT_VECT [(match_operand:PCRELOPT_VECT 1 "d_form_memory" "m") + (match_operand:DI 2 "vsx_register_operand" "0") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_LD_RELOC)) + (clobber (match_operand:DI 4 "base_reg_operand" "=b"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[3]); + return "lxv %x0,%1"; +} + [(set_attr "type" "vecload")]) + + +;; PCREL_OPT optimization for stores. We need to put the label after the PLD +;; instruction, because the assembler might insert a NOP before the PLD for +;; alignment. +;; +;; If we are optimizing a single write, normally the code would look like: +;; +;; (set (reg:DI <ptr>) +;; (symbol_ref:DI "<extern_addr>")) # <data> must be live here +;; +;; ... # insns do not need to be adjacent +;; +;; (set (mem:SI (reg:DI <xxx>)) +;; (reg:SI <data>)) # <ptr> dies with this insn +;; +;; We optimize this to be: +;; +;; (parallel [(set (reg:DI <ptr>) +;; (unspec:DI [(symbol_ref:DI "<extern_addr>") +;; (const_int <marker>)] +;; UNSPEC_PCREL_OPT_ST_ADDR)) +;; (use (reg:<MODE> <data>))]) +;; +;; ... # insns do not need to be adjacent +;; +;; (parallel [(set (mem:<MODE> (reg:DI <ptr>)) +;; (unspec:<MODE> [(reg:<MODE> <data>) +;; (const_int <marker>)] +;; UNSPEC_PCREL_OPT_ST_RELOC)) +;; (clobber (reg:DI <ptr>))]) + +(define_insn "*pcrel_opt_st_addr<mode>" + [(set (match_operand:DI 0 "gpc_reg_operand" "=b") + (unspec:DI [(match_operand:DI 1 "pcrel_external_address") + (match_operand 2 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_ST_ADDR)) + (use (match_operand:PCRELOPT 3 "gpc_reg_operand" "rwa"))] + "TARGET_PCREL_OPT" + "ld %0,%a1\n.Lpcrel%2:" + [(set_attr "prefixed" "yes") + (set_attr "type" "load") + (set_attr "loads_external_address" "yes")]) + +;; PCREL_OPT stores. +(define_insn "*pcrel_opt_st<mode>" + [(set (match_operand:QHSI 0 "d_form_memory" "=m") + (unspec:QHSI [(match_operand:QHSI 1 "gpc_reg_operand" "r") + (match_operand 2 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[2]); + return "st<wd> %1,%0"; +} + [(set_attr "type" "store")]) + +(define_insn "*pcrel_opt_stdi" + [(set (match_operand:DI 0 "d_form_memory" "=m,m,m") + (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r,d,v") + (match_operand 2 "const_int_operand" "n,n,n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] + "TARGET_PCREL_OPT && TARGET_POWERPC64" +{ + output_pcrel_opt_reloc (operands[2]); + switch (which_alternative) + { + case 0: + return "std %1,%0"; + case 1: + return "stfd %1,%0"; + case 2: + return "stxsd %1,%0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "store,fpstore,fpstore")]) + +(define_insn "*pcrel_opt_stsf" + [(set (match_operand:SF 0 "d_form_memory" "=m,m,m") + (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "d,v,r") + (match_operand 2 "const_int_operand" "n,n,n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[2]); + switch (which_alternative) + { + case 0: + return "stfs %1,%0"; + case 1: + return "stxssp %1,%0"; + case 2: + return "stw %1,%0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fpstore,fpstore,store")]) + +(define_insn "*pcrel_opt_stdf" + [(set (match_operand:DF 0 "d_form_memory" "=m,m,m") + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d,v,r") + (match_operand 2 "const_int_operand" "n,n,n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] + "TARGET_PCREL_OPT + && (TARGET_POWERPC64 || vsx_register_operand (operands[1], DFmode))" +{ + output_pcrel_opt_reloc (operands[2]); + switch (which_alternative) + { + case 0: + return "stfd %1,%0"; + case 1: + return "stxsd %1,%0"; + case 2: + return "std %1,%0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fpstore,fpstore,store")]) + +(define_insn "*pcrel_opt_st<mode>" + [(set (match_operand:PCRELOPT_VECT 0 "d_form_memory" "=m") + (unspec:PCRELOPT_VECT [(match_operand:PCRELOPT_VECT 1 "gpc_reg_operand" "wa") + (match_operand 2 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[2]); + return "stxv %x1,%0"; +} + [(set_attr "type" "vecstore")]) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index bd26c62..69f3c70 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -992,6 +992,20 @@ return INTVAL (offset) % 4 == 0; }) +;; Return 1 if the operand is a memory operand that has a valid address for +;; a DS-form instruction. I.e. the address has to be either just a register, +;; or register + const where the two low order bits of const are zero. +(define_predicate "ds_form_mem_operand" + (match_code "subreg,mem") +{ + if (!any_memory_operand (op, mode)) + return false; + + rtx addr = XEXP (op, 0); + + return address_to_insn_form (addr, mode, NON_PREFIXED_DS) == INSN_FORM_DS; +}) + ;; Return 1 if the operand, used inside a MEM, is a SYMBOL_REF. (define_predicate "symbol_ref_operand" (and (match_code "symbol_ref") @@ -1904,3 +1918,24 @@ { return address_is_prefixed (XEXP (op, 0), mode, NON_PREFIXED_DEFAULT); }) + +;; Return true if the operand is a valid memory operand with a D-form +;; address that could be merged with the load of a PC-relative external address +;; with the PCREL_OPT optimization. We don't check here whether or not the +;; offset needs to be used in a DS-FORM (bottom 2 bits 0) or DQ-FORM (bottom 4 +;; bits 0) instruction. +(define_predicate "d_form_memory" + (match_code "mem") +{ + if (!memory_operand (op, mode)) + return false; + + rtx addr = XEXP (op, 0); + + if (REG_P (addr)) + return true; + if (SUBREG_P (addr) && REG_P (SUBREG_REG (addr))) + return true; + + return !indexed_address (addr, mode); +}) diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 058a32a..609bebd 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -43,6 +43,10 @@ ATTR builtin attribute information. ICODE Insn code of the function that implements the builtin. */ +#ifndef RS6000_BUILTIN_COMPAT + #undef BU_COMPAT + #define BU_COMPAT(ENUM, COMPAT_NAME) + #ifndef RS6000_BUILTIN_0 #error "RS6000_BUILTIN_0 is not defined." #endif @@ -87,6 +91,36 @@ #error "RS6000_BUILTIN_X is not defined." #endif +#else + /* Compatibility builtins. These builtins are simply mapped into + their compatible builtin function identified by ENUM. */ + #undef BU_COMPAT + #define BU_COMPAT(ENUM, COMPAT_NAME) { ENUM, "__builtin_" COMPAT_NAME }, + + #undef RS6000_BUILTIN_0 + #undef RS6000_BUILTIN_1 + #undef RS6000_BUILTIN_2 + #undef RS6000_BUILTIN_3 + #undef RS6000_BUILTIN_4 + #undef RS6000_BUILTIN_A + #undef RS6000_BUILTIN_D + #undef RS6000_BUILTIN_H + #undef RS6000_BUILTIN_M + #undef RS6000_BUILTIN_P + #undef RS6000_BUILTIN_X + #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) + #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) + #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) + #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) + #define RS6000_BUILTIN_4(ENUM, NAME, MASK, ATTR, ICODE) + #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) + #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) + #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) + #define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE) + #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) + #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) +#endif + #ifndef BU_AV_1 /* Define convenience macros using token pasting to allow fitting everything in one line. */ @@ -368,6 +402,23 @@ | RS6000_BTC_BINARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +/* Like BU_MMA_2, but uses "vsx" rather than "mma" naming. */ +#define BU_MMA_V2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_MMA, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY \ + | RS6000_BTC_VOID \ + | RS6000_BTC_GIMPLE), \ + CODE_FOR_nothing) /* ICODE */ \ + RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM ## _INTERNAL, /* ENUM */ \ + "__builtin_vsx_" NAME "_internal", /* NAME */ \ + RS6000_BTM_MMA, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #define BU_MMA_3(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_mma_" NAME, /* NAME */ \ @@ -384,6 +435,23 @@ | RS6000_BTC_TERNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +/* Like BU_MMA_3, but uses "vsx" rather than "mma" naming. */ +#define BU_MMA_V3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_MMA, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY \ + | RS6000_BTC_VOID \ + | RS6000_BTC_GIMPLE), \ + CODE_FOR_nothing) /* ICODE */ \ + RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM ## _INTERNAL, /* ENUM */ \ + "__builtin_vsx_" NAME "_internal", /* NAME */ \ + RS6000_BTM_MMA, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #define BU_MMA_5(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_mma_" NAME, /* NAME */ \ @@ -3136,9 +3204,11 @@ BU_MMA_1 (XXMTACC, "xxmtacc", QUAD, mma_xxmtacc) BU_MMA_1 (XXSETACCZ, "xxsetaccz", MISC, mma_xxsetaccz) BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc", QUAD, mma_disassemble_acc) -BU_MMA_2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, mma_disassemble_pair) +BU_MMA_V2 (DISASSEMBLE_PAIR, "disassemble_pair", PAIR, vsx_disassemble_pair) +BU_COMPAT (VSX_BUILTIN_DISASSEMBLE_PAIR, "mma_disassemble_pair") -BU_MMA_3 (ASSEMBLE_PAIR, "assemble_pair", MISC, mma_assemble_pair) +BU_MMA_V3 (ASSEMBLE_PAIR, "assemble_pair", MISC, vsx_assemble_pair) +BU_COMPAT (VSX_BUILTIN_ASSEMBLE_PAIR, "mma_assemble_pair") BU_MMA_3 (XVBF16GER2, "xvbf16ger2", MISC, mma_xvbf16ger2) BU_MMA_3 (XVF16GER2, "xvf16ger2", MISC, mma_xvf16ger2) BU_MMA_3 (XVF32GER, "xvf32ger", MISC, mma_xvf32ger) diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index de0ce50..f567625 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -89,6 +89,12 @@ #define TARGET_NO_PROTOTYPE 0 #endif +struct builtin_compatibility +{ + const enum rs6000_builtins code; + const char *const name; +}; + struct builtin_description { const HOST_WIDE_INT mask; @@ -8839,6 +8845,13 @@ def_builtin (const char *name, tree type, enum rs6000_builtins code) (int)code, name, attr_string); } +static const struct builtin_compatibility bdesc_compat[] = +{ +#define RS6000_BUILTIN_COMPAT +#include "rs6000-builtin.def" +}; +#undef RS6000_BUILTIN_COMPAT + /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */ #undef RS6000_BUILTIN_0 @@ -10115,7 +10128,7 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp) unsigned attr_args = attr & RS6000_BTC_OPND_MASK; if (attr & RS6000_BTC_QUAD - || fcode == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL) + || fcode == VSX_BUILTIN_DISASSEMBLE_PAIR_INTERNAL) attr_args++; gcc_assert (nopnds == attr_args); @@ -11730,7 +11743,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi) tree new_decl; if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC - || fncode == MMA_BUILTIN_DISASSEMBLE_PAIR) + || fncode == VSX_BUILTIN_DISASSEMBLE_PAIR) { /* This is an MMA disassemble built-in function. */ push_gimplify_context (true); @@ -11745,7 +11758,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi) another accumulator/pair, then just copy the entire thing as is. */ if ((fncode == MMA_BUILTIN_DISASSEMBLE_ACC && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node) - || (fncode == MMA_BUILTIN_DISASSEMBLE_PAIR + || (fncode == VSX_BUILTIN_DISASSEMBLE_PAIR && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node)) { tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR, @@ -11847,7 +11860,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi) gcc_unreachable (); } - if (fncode == MMA_BUILTIN_ASSEMBLE_PAIR) + if (fncode == VSX_BUILTIN_ASSEMBLE_PAIR) lhs = make_ssa_name (vector_pair_type_node); else lhs = make_ssa_name (vector_quad_type_node); @@ -13447,6 +13460,18 @@ rs6000_init_builtins (void) #ifdef SUBTARGET_INIT_BUILTINS SUBTARGET_INIT_BUILTINS; #endif + + /* Register the compatibility builtins after all of the normal + builtins have been defined. */ + const struct builtin_compatibility *d = bdesc_compat; + unsigned i; + for (i = 0; i < ARRAY_SIZE (bdesc_compat); i++, d++) + { + tree decl = rs6000_builtin_decls[(int)d->code]; + if (decl != NULL) + add_builtin_function (d->name, TREE_TYPE (decl), (int)d->code, + BUILT_IN_MD, NULL, NULL_TREE); + } } /* Returns the rs6000 builtin decl for CODE. */ @@ -14119,7 +14144,7 @@ mma_init_builtins (void) else { if (!(d->code == MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL - || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL) + || d->code == VSX_BUILTIN_DISASSEMBLE_PAIR_INTERNAL) && (attr & RS6000_BTC_QUAD) == 0) attr_args--; @@ -14129,7 +14154,7 @@ mma_init_builtins (void) /* This is a disassemble pair/acc function. */ if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC - || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR) + || d->code == VSX_BUILTIN_DISASSEMBLE_PAIR) { op[nopnds++] = build_pointer_type (void_type_node); if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC) @@ -14143,7 +14168,7 @@ mma_init_builtins (void) unsigned j = 0; if (attr & RS6000_BTC_QUAD && d->code != MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL - && d->code != MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL) + && d->code != VSX_BUILTIN_DISASSEMBLE_PAIR_INTERNAL) j = 1; for (; j < (unsigned) insn_data[icode].n_operands; j++) { @@ -14151,7 +14176,7 @@ mma_init_builtins (void) if (gimple_func && mode == XOmode) op[nopnds++] = build_pointer_type (vector_quad_type_node); else if (gimple_func && mode == OOmode - && d->code == MMA_BUILTIN_ASSEMBLE_PAIR) + && d->code == VSX_BUILTIN_ASSEMBLE_PAIR) op[nopnds++] = build_pointer_type (vector_pair_type_node); else /* MMA uses unsigned types. */ diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index f0cf79e..cbbb42c 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -77,6 +77,7 @@ /* Flags that need to be turned off if -mno-power10. */ #define OTHER_POWER10_MASKS (OPTION_MASK_MMA \ | OPTION_MASK_PCREL \ + | OPTION_MASK_PCREL_OPT \ | OPTION_MASK_PREFIXED) #define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER \ @@ -147,6 +148,7 @@ | OPTION_MASK_P9_MISC \ | OPTION_MASK_P9_VECTOR \ | OPTION_MASK_PCREL \ + | OPTION_MASK_PCREL_OPT \ | OPTION_MASK_POPCNTB \ | OPTION_MASK_POPCNTD \ | OPTION_MASK_POWERPC64 \ diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def index 606ad3e..c8e46ba 100644 --- a/gcc/config/rs6000/rs6000-passes.def +++ b/gcc/config/rs6000/rs6000-passes.def @@ -24,4 +24,12 @@ along with GCC; see the file COPYING3. If not see REPLACE_PASS (PASS, INSTANCE, TGT_PASS) */ + /* Pass to add the appropriate vector swaps on power8 little endian systems. + The power8 does not have instructions that automaticaly do the byte swaps + for loads and stores. */ INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps); + + /* Pass to do the PCREL_OPT optimization that combines the load of an + external symbol's address along with a single load or store using that + address as a base register. */ + INSERT_PASS_BEFORE (pass_sched2, 1, pass_pcrel_opt); diff --git a/gcc/config/rs6000/rs6000-pcrel-opt.c b/gcc/config/rs6000/rs6000-pcrel-opt.c new file mode 100644 index 0000000..32275aa --- /dev/null +++ b/gcc/config/rs6000/rs6000-pcrel-opt.c @@ -0,0 +1,910 @@ +/* Subroutines used support the pc-relative linker optimization. + Copyright (C) 2020-2021 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* This file implements a RTL pass that looks for pc-relative loads of the + address of an external variable using the PCREL_GOT relocation and a single + load that uses that external address. If that is found we create the + PCREL_OPT relocation to possibly convert: + + pld addr_reg,var@pcrel@got + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + lwz data_reg,0(addr_reg) + + into: + + plwz data_reg,var@pcrel + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + nop + + Of course it would be nice to be able to put the plwz in this example in + place of the lwz but the linker cannot easily replace a 4-byte instruction + with an 8-byte one. + + If the variable is not defined in the main program or the code using it is + not in the main program, the linker puts the address in the .got section and + generates: + + .section .got + .Lvar_got: + .dword var + + At the point where it is referenced, we have: + + .section .text + pld addr_reg,.Lvar_got@pcrel + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + lwz data_reg,0(addr_reg) + + We look for a single usage in the basic block where this external + address is loaded, and convert it to a PCREL_OPT relocation so the + linker can convert it to a single plwz in this case. Multiple uses + or references in another basic block will force us to not use the + PCREL_OPT relocation. + + We also optimize stores to the address of an external variable using the + PCREL_GOT relocation and a single store that uses that external address. If + that is found we create the PCREL_OPT relocation to possibly convert: + + pld addr_reg,var@pcrel@got + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + stw data_reg,0(addr_reg) + + into: + + pstw data_reg,var@pcrel + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + nop + + If the variable is not defined in the main program or the code using it is + not in the main program, the linker puts the address in the .got section and + generates: + + .section .got + .Lvar_got: + .dword var + + And at our point of reference we have: + + .section .text + pld addr_reg,.Lvar_got@pcrel + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + stw data_reg,0(addr_reg) + + We only look for a single usage in the basic block where the external + address is loaded. Multiple uses or references in another basic block will + force us to not use the PCREL_OPT relocation. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "rtl.h" +#include "tree.h" +#include "memmodel.h" +#include "expmed.h" +#include "optabs.h" +#include "recog.h" +#include "df.h" +#include "tm_p.h" +#include "ira.h" +#include "print-tree.h" +#include "varasm.h" +#include "explow.h" +#include "expr.h" +#include "output.h" +#include "tree-pass.h" +#include "rtx-vector-builder.h" +#include "print-rtl.h" +#include "insn-attr.h" +#include "insn-codes.h" + +/* Various counters. */ +static struct { + unsigned long extern_addrs; + unsigned long loads; + unsigned long adjacent_loads; + unsigned long failed_loads; + unsigned long stores; + unsigned long adjacent_stores; + unsigned long failed_stores; +} counters; + +/* Unique integer that is appended to .Lpcrel to make a pcrel_opt label. */ +static unsigned int pcrel_opt_next_num; + + +/* Optimize a PC-relative load address to be used in a load. Before it calls + this function, pcrel_opt_address () uses DF to make sure that it is safe + to do the PCREL_OPT optimization on these insns. + + Convert insns of the form: + + (set (reg:DI addr) + (symbol_ref:DI "ext_symbol")) + + ... + + (set (reg:<MODE> value) + (mem:<MODE> (reg:DI addr))) + + into: + + (parallel [(set (reg:DI addr) + (unspec:<MODE> [(symbol_ref:DI "ext_symbol") + (const_int label_num)] + UNSPEC_PCREL_OPT_LD_ADDR)) + (set (reg:DI data) + (unspec:DI [(const_int 0)] + UNSPEC_PCREL_OPT_LD_DATA))]) + + ... + + (parallel [(set (reg:<MODE>) + (unspec:<MODE> [(mem:<MODE> (reg:DI addr)) + (reg:DI data) + (const_int label_num)] + UNSPEC_PCREL_OPT_LD_RELOC)) + (clobber (reg:DI addr))]) + + Because PCREL_OPT will move the actual location of the load from the second + insn to the first, we need to have the register for the load data be live + starting at the first insn. + + If the destination register for the data being loaded is the same register + used to hold the extern address, we generate this insn instead: + + (set (reg:DI data) + (unspec:DI [(symbol_ref:DI "ext_symbol") + (const_int label_num)] + UNSPEC_PCREL_OPT_LD_SAME_REG)) + + In the first insn, we set both the address of the external variable, and mark + that the variable being loaded both are created in that insn, and are + consumed in the second insn. The mode used in the first insn for the data + register that will be loaded in the second insn doesn't matter in the end so + we use DImode. We just need to mark that both registers may be set in the + first insn, and will be used in the second insn. + + The UNSPEC_PCREL_OPT_LD_ADDR insn will generate the load address plus + a definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_LD_RELOC + insn will generate the .reloc to tell the linker to tie the load address and + load using that address together. + + pld b,ext_symbol@got@pcrel + .Lpcrel1: + + ... + + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) + lwz r,0(b) + + If ext_symbol is defined in another object file in the main program and we + are linking the main program, the linker will convert the above instructions + to: + + plwz r,ext_symbol@got@pcrel + + ... + + nop + + ADDR_INSN is the insn that is loading the address. + LOAD_INSN is the insn that uses the address to load the actual data. */ + +static void +pcrel_opt_load (rtx_insn *addr_insn, rtx_insn *load_insn) +{ + rtx addr_set = PATTERN (addr_insn); + gcc_assert (GET_CODE (addr_set) == SET); + + rtx addr_reg = SET_DEST (addr_set); + gcc_assert (base_reg_operand (addr_reg, Pmode)); + + rtx addr_symbol = SET_SRC (addr_set); + gcc_assert (pcrel_external_address (addr_symbol, Pmode)); + + rtx load_set = PATTERN (load_insn); + gcc_assert (GET_CODE (load_set) == SET); + + /* Make sure there are no references to the register being loaded + between the two insns. */ + rtx reg = SET_DEST (load_set); + if (reg_used_between_p (reg, addr_insn, load_insn) + || reg_set_between_p (reg, addr_insn, load_insn)) + return; + + rtx mem = SET_SRC (load_set); + machine_mode reg_mode = GET_MODE (reg); + machine_mode mem_mode = GET_MODE (mem); + rtx mem_inner = mem; + unsigned int reg_regno = reg_or_subregno (reg); + + /* Handle the fact that LWA is a DS format instruction, but LWZ is a D format + instruction. If the mem load is a signed SImode (i.e. LWA would be used) + we set mem_mode to DImode so that pcrel_opt_valid_mem_p() will check that + the address will work for a DS-form instruction. If it won't work, we skip + the optimization. The float loads are all indexed so there are no problems + there. */ + + if (GET_CODE (mem) == SIGN_EXTEND && GET_MODE (XEXP (mem, 0)) == SImode) + { + if (!INT_REGNO_P (reg_regno)) + return; + + mem_inner = XEXP (mem, 0); + mem_mode = DImode; + } + + else if (GET_CODE (mem) == SIGN_EXTEND + || GET_CODE (mem) == ZERO_EXTEND + || GET_CODE (mem) == FLOAT_EXTEND) + { + mem_inner = XEXP (mem, 0); + mem_mode = GET_MODE (mem_inner); + } + + if (!MEM_P (mem_inner)) + return; + + /* Can we do PCREL_OPT for this reference? */ + if (!pcrel_opt_valid_mem_p (reg, mem_mode, mem_inner)) + return; + + /* Allocate a new PC-relative label, and update the load external address + insn. + + If the register being loaded is different from the address register, we + need to indicate both registers are set at the load of the address. + + (parallel [(set (reg load) + (unspec [(symbol_ref addr_symbol) + (const_int label_num)] + UNSPEC_PCREL_OPT_LD_ADDR)) + (set (reg addr) + (unspec [(const_int 0)] + UNSPEC_PCREL_OPT_LD_DATA))]) + + If the register being loaded is the same as the address register, we use + an alternate form: + + (set (reg load) + (unspec [(symbol_ref addr_symbol) + (const_int label_num)] + UNSPEC_PCREL_OPT_LD_SAME_REG)) */ + unsigned int addr_regno = reg_or_subregno (addr_reg); + rtx label_num = GEN_INT (++pcrel_opt_next_num); + rtx reg_di = gen_rtx_REG (DImode, reg_regno); + rtx addr_pattern; + + /* Create the load address, either using the pattern with an explicit clobber + if the address register is not the same as the register being loaded, or + using the pattern that requires the address register to be the address + loaded. */ + if (addr_regno != reg_regno) + addr_pattern = gen_pcrel_opt_ld_addr (addr_reg, addr_symbol, label_num, + reg_di); + else + addr_pattern = gen_pcrel_opt_ld_addr_same_reg (addr_reg, addr_symbol, + label_num); + + validate_change (addr_insn, &PATTERN (addr_insn), addr_pattern, false); + + /* Update the load insn. If the mem had a sign/zero/float extend, add that + also after doing the UNSPEC. Add an explicit clobber of the external + address register just to make it clear that the address register dies. + + (parallel [(set (reg:<MODE> data) + (unspec:<MODE> [(mem (addr_reg) + (reg:DI data) + (const_int label_num)] + UNSPEC_PCREL_OPT_LD_RELOC)) + (clobber (reg:DI addr_reg))]) */ + rtvec v_load = gen_rtvec (3, mem_inner, reg_di, label_num); + rtx new_load = gen_rtx_UNSPEC (GET_MODE (mem_inner), v_load, + UNSPEC_PCREL_OPT_LD_RELOC); + + if (GET_CODE (mem) != GET_CODE (mem_inner)) + new_load = gen_rtx_fmt_e (GET_CODE (mem), reg_mode, new_load); + + rtx new_load_set = gen_rtx_SET (reg, new_load); + rtx load_clobber = gen_rtx_CLOBBER (VOIDmode, + (addr_regno == reg_regno + ? gen_rtx_SCRATCH (Pmode) + : addr_reg)); + rtx new_load_pattern + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_load_set, load_clobber)); + + validate_change (load_insn, &PATTERN (load_insn), new_load_pattern, false); + + /* Attempt to apply the changes: */ + if (!apply_change_group ()) + { + /* PCREL_OPT load optimization did not succeed. */ + counters.failed_loads++; + if (dump_file) + fprintf (dump_file, + "PCREL_OPT load failed (addr insn = %d, use insn = %d).\n", + INSN_UID (addr_insn), + INSN_UID (load_insn)); + return; + } + + /* PCREL_OPT load optimization succeeded. */ + counters.loads++; + if (next_nonnote_insn (addr_insn) == load_insn) + counters.adjacent_loads++; + + if (dump_file) + fprintf (dump_file, + "PCREL_OPT load (addr insn = %d, use insn = %d).\n", + INSN_UID (addr_insn), + INSN_UID (load_insn)); + + /* Because we have set DF_DEFER_INSN_RESCAN, we have to explicitly do it + after we have made changes to the insns. */ + df_analyze (); + +} + +/* Optimize a PC-relative load address to be used in a store. Before calling + this function, pcrel_opt_address () uses DF to make sure it is safe to do + the PCREL_OPT optimization. + + Convert insns of the form: + + (set (reg:DI addr) + (symbol_ref:DI "ext_symbol")) + + ... + + (set (mem:<MODE> (reg:DI addr)) + (reg:<MODE> value)) + + into: + + (parallel [(set (reg:DI addr) + (unspec:DI [(symbol_ref:DI "ext_symbol") + (const_int label_num)] + UNSPEC_PCREL_OPT_ST_ADDR)) + (use (reg:<MODE> value))]) + + ... + + (parallel [(set (mem:<MODE> (reg:DI addr)) + (unspec:<MODE> [(reg:<MODE>) + (const_int label_num)] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (reg:DI addr))]) + + The UNSPEC_PCREL_OPT_ST_ADDR insn will generate the load address plus a + definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_ST_RELOC insn + will generate the .reloc to tell the linker to tie the load address and load + using that address together. + + pld b,ext_symbol@got@pcrel + .Lpcrel1: + + ... + + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) + stw r,0(b) + + If ext_symbol is defined in another object file in the main program and we + are linking the main program, the linker will convert the above instructions + to: + + pstwz r,ext_symbol@got@pcrel + + ... + + nop */ + +static void +pcrel_opt_store (rtx_insn *addr_insn, /* insn loading address. */ + rtx_insn *store_insn) /* insn using address. */ +{ + rtx addr_old_set = PATTERN (addr_insn); + gcc_assert (GET_CODE (addr_old_set) == SET); + + rtx addr_reg = SET_DEST (addr_old_set); + gcc_assert (base_reg_operand (addr_reg, Pmode)); + + rtx addr_symbol = SET_SRC (addr_old_set); + gcc_assert (pcrel_external_address (addr_symbol, Pmode)); + + rtx store_set = PATTERN (store_insn); + gcc_assert (GET_CODE (store_set) == SET); + + rtx mem = SET_DEST (store_set); + if (!MEM_P (mem)) + return; + + machine_mode mem_mode = GET_MODE (mem); + rtx reg = SET_SRC (store_set); + + /* Don't allow storing the address of the external variable. */ + if (reg_or_subregno (reg) == reg_or_subregno (addr_reg)) + return; + + /* Can we do PCREL_OPT for this reference? */ + if (!pcrel_opt_valid_mem_p (reg, mem_mode, mem)) + return; + + /* Allocate a new PC-relative label, and update the load address insn. + + (parallel [(set (reg addr) + (unspec [(symbol_ref symbol) + (const_int label_num)] + UNSPEC_PCREL_OPT_ST_ADDR)) + (use (reg store))]) + */ + rtx label_num = GEN_INT (++pcrel_opt_next_num); + rtvec v_addr = gen_rtvec (2, addr_symbol, label_num); + rtx addr_unspec = gen_rtx_UNSPEC (Pmode, v_addr, + UNSPEC_PCREL_OPT_ST_ADDR); + rtx addr_new_set = gen_rtx_SET (addr_reg, addr_unspec); + rtx addr_use = gen_rtx_USE (VOIDmode, reg); + rtx addr_new_pattern + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, addr_new_set, addr_use)); + + validate_change (addr_insn, &PATTERN (addr_insn), addr_new_pattern, false); + + /* Update the store insn. Add an explicit clobber of the external address + register just to be sure there are no additional uses of the address + register. + + (parallel [(set (mem (addr_reg) + (unspec:<MODE> [(reg) + (const_int label_num)] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (reg:DI addr_reg))]) */ + rtvec v_store = gen_rtvec (2, reg, label_num); + rtx new_store = gen_rtx_UNSPEC (mem_mode, v_store, + UNSPEC_PCREL_OPT_ST_RELOC); + + rtx new_store_set = gen_rtx_SET (mem, new_store); + rtx store_clobber = gen_rtx_CLOBBER (VOIDmode, addr_reg); + rtx new_store_pattern + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_store_set, store_clobber)); + + validate_change (store_insn, &PATTERN (store_insn), new_store_pattern, false); + + /* Attempt to apply the changes: */ + if (!apply_change_group ()) + { + /* PCREL_OPT store failed. */ + counters.failed_stores++; + if (dump_file) + fprintf (dump_file, + "PCREL_OPT store failed (addr insn = %d, use insn = %d).\n", + INSN_UID (addr_insn), + INSN_UID (store_insn)); + return; + } + + /* PCREL_OPT store succeeded. */ + counters.stores++; + if (next_nonnote_insn (addr_insn) == store_insn) + counters.adjacent_stores++; + + if (dump_file) + fprintf (dump_file, + "PCREL_OPT store (addr insn = %d, use insn = %d).\n", + INSN_UID (addr_insn), + INSN_UID (store_insn)); + + /* Because we have set DF_DEFER_INSN_RESCAN, we have to explicitly do it + after we have made changes to the insns. */ + df_analyze(); + +} + +/* Return the register used as the base register of MEM, if the instruction has + a pc-relative form. We look for BSWAP to rule out LFIWAX/LFIWZX/STFIWX, and + ROTATE/VEC_SELECT are RTX_EXTRA not RTX_UNARY which rules out lxvd2x. This + excludes instructions that do not have a pc-relative form. */ + +static rtx +get_mem_base_reg (rtx mem) +{ + const char * fmt; + + while (!MEM_P (mem)) + { + if (GET_RTX_CLASS (GET_CODE (mem)) != RTX_UNARY + || GET_CODE (mem) == BSWAP) + return NULL_RTX; + fmt = GET_RTX_FORMAT (GET_CODE (mem)); + if (fmt[0] != 'e') + return NULL_RTX; + mem = XEXP (mem, 0); + if (mem == NULL_RTX ) + return NULL_RTX; + } + + if (!MEM_SIZE_KNOWN_P (mem)) + return NULL_RTX; + + rtx addr_rtx = (XEXP (mem, 0)); + if (GET_CODE (addr_rtx) == PRE_MODIFY) + addr_rtx = XEXP (addr_rtx, 1); + + while (GET_CODE (addr_rtx) == PLUS + && CONST_INT_P (XEXP (addr_rtx, 1))) + addr_rtx = XEXP (addr_rtx, 0); + + if (!REG_P (addr_rtx)) + return NULL_RTX; + + return addr_rtx; +} + +/* Check whether INSN contains a reference to REGNO that will inhibit the + PCREL_OPT optimization. If TYPE is a load or store instruction, return true + if there is a definition of REGNO. If TYPE is a load instruction, then + return true of there is a use of REGNO. */ + +static bool +insn_references_regno_p (rtx_insn *insn, unsigned int regno, + enum attr_type type) +{ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref ref; + + /* Return true if there is a definition of REGNO. */ + for (ref = DF_INSN_INFO_DEFS (insn_info); ref; ref = DF_REF_NEXT_LOC (ref)) + if (DF_REF_REGNO (ref) == regno) + return true; + + /* If type is a load, return true if there is a use of REGNO. */ + if (type == TYPE_LOAD + || type == TYPE_FPLOAD + || type == TYPE_VECLOAD) + for (ref = DF_INSN_INFO_USES (insn_info); ref; ref = DF_REF_NEXT_LOC (ref)) + if (DF_REF_REGNO (ref) == regno) + return true; + + return false; +} + +/* Given an insn that loads up a base register with the address of an + external symbol, see if we can optimize it with the PCREL_OPT + optimization. + + DF is used to make sure that there is exactly one definition and one + non-debug use of the address register defined by the insn. The use insn must + be a non-prefix insn, and must also be in the same basic block as the address + insn. + + ADDR_INSN is the insn that loads the external symbol address. */ + +static void +pcrel_opt_address (rtx_insn *addr_insn) +{ + counters.extern_addrs++; + + /* Do some basic validation. */ + rtx addr_set = PATTERN (addr_insn); + if (GET_CODE (addr_set) != SET) + return; + + rtx addr_reg = SET_DEST (addr_set); + rtx addr_symbol = SET_SRC (addr_set); + + if (!base_reg_operand (addr_reg, Pmode) + || !pcrel_external_address (addr_symbol, Pmode)) + return; + + /* The address register must have exactly one definition. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (addr_insn); + if (!insn_info) + return; + + df_ref def = df_single_def (insn_info); + if (!def) + return; + + /* Make sure there is at least one use. */ + df_link *chain = DF_REF_CHAIN (def); + if (!chain || !chain->ref) + return; + + /* Get the insn of the possible load or store. */ + rtx_insn *use_insn = DF_REF_INSN (chain->ref); + + /* Ensure there are no other uses. */ + for (chain = chain->next; chain; chain = chain->next) + if (chain->ref && DF_REF_INSN_INFO (chain->ref)) + { + gcc_assert (DF_REF_INSN (chain->ref)); + if (NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) + return; + } + + /* The use instruction must be a single non-prefixed instruction. */ + if (get_attr_length (use_insn) != 4) + return; + + /* The address and the memory operation must be in the same basic block. */ + if (BLOCK_FOR_INSN (use_insn) != BLOCK_FOR_INSN (addr_insn)) + return; + + /* If this isn't a simple SET, skip doing the optimization. */ + if (GET_CODE (PATTERN (use_insn)) != SET) + return; + + enum attr_type use_insn_type = get_attr_type (use_insn); + unsigned int use_regno; + + /* Make sure the use_insn is using addr_reg as its base register + for the load or store, and determine the regno for the register + used in the use_insn. */ + rtx use_dest, use_src; + switch (use_insn_type) + { + case TYPE_LOAD: + case TYPE_FPLOAD: + case TYPE_VECLOAD: + /* Make sure our address register is the same register used in the + base address of the load. */ + if (addr_reg != get_mem_base_reg (SET_SRC (PATTERN (use_insn)))) + return; + /* Make sure we are setting a register before we look at REGNO. */ + use_dest = SET_DEST (PATTERN (use_insn)); + if (!register_operand (use_dest, GET_MODE (use_dest))) + return; + use_regno = REGNO (use_dest); + break; + case TYPE_STORE: + case TYPE_FPSTORE: + case TYPE_VECSTORE: + /* Make sure our address register is the same register used in the + base address of the store. */ + if (addr_reg != get_mem_base_reg (SET_DEST (PATTERN (use_insn)))) + return; + /* Make sure this is a register before we look at REGNO. */ + use_src = SET_SRC (PATTERN (use_insn)); + if (!register_operand (use_src, GET_MODE (use_src))) + return; + use_regno = REGNO (use_src); + break; + default: + /* We can only optimize loads and stores. Ignore everything else. */ + return; + } + + rtx_insn *insn; + for (insn = NEXT_INSN (addr_insn); + insn != use_insn; + insn = NEXT_INSN (insn)) + { + /* If we see a call, do not do the PCREL_OPT optimization. */ + if (CALL_P (insn)) + return; + + /* Skip debug insns. */ + if (!NONDEBUG_INSN_P (insn)) + continue; + + /* See if it is a load or store. */ + if (GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + { + switch (get_attr_type (insn)) + { + case TYPE_LOAD: + /* While load of the external address is a 'load' for scheduling + purposes, it should be safe to allow loading other external + addresses between the load of the external address we are + currently looking at and the load or store using that + address. */ + if (get_attr_loads_external_address (insn) + == LOADS_EXTERNAL_ADDRESS_YES) + break; + /* fall through */ + + case TYPE_FPLOAD: + case TYPE_VECLOAD: + /* Don't do the PCREL_OPT store optimization if there is a load + operation. For example, the load might be trying to load the + value being stored in between getting the address and doing + the store. */ + if (use_insn_type == TYPE_STORE + || use_insn_type == TYPE_FPSTORE + || use_insn_type == TYPE_VECSTORE) + return; + break; + + case TYPE_STORE: + case TYPE_FPSTORE: + case TYPE_VECSTORE: + /* Don't do the PCREL_OPT load optimization if there is a store + operation. Perhaps the store might be to the global variable + through a pointer. */ + return; + + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_HTM: + case TYPE_HTMSIMPLE: + /* Don't do the optimization through atomic operations. */ + return; + + default: + break; + } + } + + /* Check for invalid references of the non-address register that is + used in the load or store instruction. */ + if (insn_references_regno_p (insn, use_regno, use_insn_type)) + return; + } + + /* Is this a load or a store? */ + switch (use_insn_type) + { + case TYPE_LOAD: + case TYPE_FPLOAD: + case TYPE_VECLOAD: + pcrel_opt_load (addr_insn, use_insn); + break; + + case TYPE_STORE: + case TYPE_FPSTORE: + case TYPE_VECSTORE: + pcrel_opt_store (addr_insn, use_insn); + break; + + default: + gcc_unreachable (); + } +} + +/* Optimize pcrel external variable references. */ + +static unsigned int +pcrel_opt_pass (function *fun) +{ + basic_block bb; + rtx_insn *insn, *curr_insn = 0; + + memset (&counters, 0, sizeof (counters)); + + /* Dataflow analysis for use-def chains. However we have to specify both UD + and DU as otherwise when we make changes to insns for the PCREL_OPT there + will be dangling references. */ + df_set_flags (DF_RD_PRUNE_DEAD_DEFS); + df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN); + df_note_add_problem (); + df_analyze (); + + /* Set the defer flag as our pattern of operation will be to modify two insns, + then call df_analyze (). */ + df_set_flags (DF_DEFER_INSN_RESCAN | DF_LR_RUN_DCE); + + if (dump_file) + fprintf (dump_file, "\n"); + + /* Look at each basic block to see if there is a load of an external + variable's external address, and a single load/store using that external + address. */ + FOR_ALL_BB_FN (bb, fun) + { + FOR_BB_INSNS_SAFE (bb, insn, curr_insn) + { + if (NONJUMP_INSN_P (insn) + && single_set (insn) + && get_attr_loads_external_address (insn) + == LOADS_EXTERNAL_ADDRESS_YES) + pcrel_opt_address (insn); + } + } + + if (dump_file) + { + fprintf (dump_file, + "\n# of loads of an address of an external symbol = %lu\n", + counters.extern_addrs); + + fprintf (dump_file, "# of PCREL_OPT loads = %lu (adjacent %lu)\n", + counters.loads, counters.adjacent_loads); + + if (counters.failed_loads) + fprintf (dump_file, "# of failed PCREL_OPT loads = %lu\n", + counters.failed_loads); + + fprintf (dump_file, "# of PCREL_OPT stores = %lu (adjacent %lu)\n", + counters.stores, counters.adjacent_stores); + + if (counters.failed_stores) + fprintf (dump_file, "# of failed PCREL_OPT stores = %lu\n", + counters.failed_stores); + + fprintf (dump_file, "\n"); + } + + df_remove_problem (df_chain); + df_process_deferred_rescans (); + df_set_flags (DF_RD_PRUNE_DEAD_DEFS | DF_LR_RUN_DCE); + df_analyze (); + return 0; +} + +/* Optimize pc-relative references for the new PCREL_OPT pass. */ +const pass_data pass_data_pcrel_opt = +{ + RTL_PASS, /* type. */ + "pcrel_opt", /* name. */ + OPTGROUP_NONE, /* optinfo_flags. */ + TV_NONE, /* tv_id. */ + 0, /* properties_required. */ + 0, /* properties_provided. */ + 0, /* properties_destroyed. */ + 0, /* todo_flags_start. */ + TODO_df_finish, /* todo_flags_finish. */ +}; + +/* Pass data structures. */ +class pcrel_opt : public rtl_opt_pass +{ +public: + pcrel_opt (gcc::context *ctxt) + : rtl_opt_pass (pass_data_pcrel_opt, ctxt) + {} + + ~pcrel_opt (void) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return (TARGET_PCREL && TARGET_PCREL_OPT && optimize); + } + + virtual unsigned int execute (function *fun) + { + return pcrel_opt_pass (fun); + } + + opt_pass *clone () + { + return new pcrel_opt (m_ctxt); + } +}; + +rtl_opt_pass * +make_pass_pcrel_opt (gcc::context *ctxt) +{ + return new pcrel_opt (ctxt); +} diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index d9d44fe..203660b 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -193,10 +193,13 @@ extern enum insn_form address_to_insn_form (rtx, machine_mode, enum non_prefixed_form); extern bool address_is_non_pfx_d_or_x (rtx addr, machine_mode mode, enum non_prefixed_form non_prefix_format); +extern bool pcrel_opt_valid_mem_p (rtx, machine_mode, rtx); +enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode); extern bool prefixed_load_p (rtx_insn *); extern bool prefixed_store_p (rtx_insn *); extern bool prefixed_paddi_p (rtx_insn *); extern void rs6000_asm_output_opcode (FILE *); +extern void output_pcrel_opt_reloc (rtx); extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int); extern int rs6000_adjust_insn_length (rtx_insn *, int); @@ -309,6 +312,7 @@ namespace gcc { class context; } class rtl_opt_pass; extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *); +extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *); extern bool rs6000_sum_of_two_registers_p (const_rtx expr); extern bool rs6000_quadword_masked_address_p (const_rtx exp); extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index ec068c5..46ddf49 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1173,7 +1173,6 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type, machine_mode, secondary_reload_info *, bool); -static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode); rtl_opt_pass *make_pass_analyze_swaps (gcc::context*); /* Hash table stuff for keeping track of TOC entries. */ @@ -3413,9 +3412,10 @@ rs6000_builtin_mask_calculate (void) not such a great idea. */ static rtx_insn * -rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/, - vec<const char *> &/*constraints*/, - vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) +rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, + vec<const char *> & /*constraints*/, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs) { clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO)); SET_HARD_REG_BIT (clobbered_regs, CA_REGNO); @@ -4452,6 +4452,9 @@ rs6000_option_override_internal (bool global_init_p) rs6000_isa_flags &= ~OPTION_MASK_MMA; } + if (!TARGET_PCREL && TARGET_PCREL_OPT) + rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT; + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags); @@ -7852,7 +7855,8 @@ rs6000_special_round_type_align (tree type, unsigned int computed, while (TREE_CODE (type) == ARRAY_TYPE) type = TREE_TYPE (type); - if (type != error_mark_node && TYPE_MODE (type) == DFmode) + if (type != error_mark_node + && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode)) align = MAX (align, 64); } @@ -8985,8 +8989,57 @@ rs6000_delegitimize_address (rtx orig_x) { rtx x, y, offset; - if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR) - orig_x = XVECEXP (orig_x, 0, 0); + /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It + encodes loading up the high part of the address of a TOC reference along + with a load of a GPR using the same base register used for the load. We + return the original SYMBOL_REF. + + (set (reg:INT1 <reg> + (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR))) + + UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These + UNSPECs include the external SYMBOL_REF along with the value being loaded. + We return the original SYMBOL_REF. + + (parallel [(set (reg:DI <base-reg>) + (unspec:DI [(symbol_ref <symbol>) + (const_int <marker>)] + UNSPEC_PCREL_OPT_LD_ADDR)) + (set (reg:DI <load-reg>) + (unspec:DI [(const_int 0)] + UNSPEC_PCREL_OPT_LD_DATA))]) + + UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the + GPR being loaded is the same as the GPR used to hold the external address. + + (set (reg:DI <base-reg>) + (unspec:DI [(symbol_ref <symbol>) + (const_int <marker>)] + UNSPEC_PCREL_OPT_LD_SAME_REG)) + + UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This + UNSPEC include the external SYMBOL_REF along with the value being loaded. + We return the original SYMBOL_REF. + + (parallel [(set (reg:DI <base-reg>) + (unspec:DI [(symbol_ref <symbol>) + (const_int <marker>)] + UNSPEC_PCREL_OPT_ST_ADDR)) + (use (reg <store-reg>))]) */ + + if (GET_CODE (orig_x) == UNSPEC) + switch (XINT (orig_x, 1)) + { + case UNSPEC_FUSION_GPR: + case UNSPEC_PCREL_OPT_LD_ADDR: + case UNSPEC_PCREL_OPT_LD_SAME_REG: + case UNSPEC_PCREL_OPT_ST_ADDR: + orig_x = XVECEXP (orig_x, 0, 0); + break; + + default: + break; + } orig_x = delegitimize_mem_from_attrs (orig_x); @@ -10059,6 +10112,9 @@ rs6000_const_vec (machine_mode mode) void rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode) { + gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode)); + gcc_assert (!altivec_indexed_or_indirect_operand (source, mode)); + /* Scalar permutations are easier to express in integer modes rather than floating-point modes, so cast them here. We use V1TImode instead of TImode to ensure that the values don't go through GPRs. */ @@ -21205,8 +21261,6 @@ rs6000_xcoff_file_start (void) main_input_filename, ".ro_"); rs6000_gen_section_name (&xcoff_tls_data_section_name, main_input_filename, ".tls_"); - rs6000_gen_section_name (&xcoff_tbss_section_name, - main_input_filename, ".tbss_[UL]"); fputs ("\t.file\t", asm_out_file); output_quoted_string (asm_out_file, main_input_filename); @@ -23788,6 +23842,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "mulhw", OPTION_MASK_MULHW, false, true }, { "multiple", OPTION_MASK_MULTIPLE, false, true }, { "pcrel", OPTION_MASK_PCREL, false, true }, + { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true }, { "popcntb", OPTION_MASK_POPCNTB, false, true }, { "popcntd", OPTION_MASK_POPCNTD, false, true }, { "power8-fusion", OPTION_MASK_P8_FUSION, false, true }, @@ -25932,6 +25987,32 @@ address_is_non_pfx_d_or_x (rtx addr, machine_mode mode, return false; } +/* Return true if an REG with a given MODE is loaded from or stored into a MEM + location uses a non-prefixed D/DS/DQ-form address. This is used to validate + the load or store with the PCREL_OPT optimization to make sure it is an + instruction that can be optimized. + + We need to specify the MODE separately from the REG to allow for loads that + include zero/sign/float extension. */ + +bool +pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem) +{ + /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the + PCREL_OPT optimization. */ + enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode); + if (non_prefixed == NON_PREFIXED_X) + return false; + + /* Check if this is a non-prefixed D/DS/DQ-form instruction. */ + rtx addr = XEXP (mem, 0); + enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed); + return (iform == INSN_FORM_BASE_REG + || iform == INSN_FORM_D + || iform == INSN_FORM_DS + || iform == INSN_FORM_DQ); +} + /* Helper function to see if we're potentially looking at lfs/stfs. - PARALLEL containing a SET and a CLOBBER - stfs: @@ -25990,7 +26071,7 @@ is_lfs_stfs_insn (rtx_insn *insn) /* Helper function to take a REG and a MODE and turn it into the non-prefixed instruction format (D/DS/DQ) used for offset memory. */ -static enum non_prefixed_form +enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode) { /* If it isn't a register, use the defaults. */ @@ -26191,7 +26272,7 @@ prefixed_paddi_p (rtx_insn *insn) /* Whether the next instruction needs a 'p' prefix issued before the instruction is printed out. */ -static bool next_insn_prefixed_p; +static bool prepend_p_to_next_insn; /* Define FINAL_PRESCAN_INSN if some processing needs to be done before outputting the assembler code. On the PowerPC, we remember if the current @@ -26202,7 +26283,7 @@ static bool next_insn_prefixed_p; void rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int) { - next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO); + prepend_p_to_next_insn = (get_attr_prefixed (insn) != PREFIXED_NO); return; } @@ -26212,12 +26293,35 @@ rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int) void rs6000_asm_output_opcode (FILE *stream) { - if (next_insn_prefixed_p) - fprintf (stream, "p"); + if (prepend_p_to_next_insn) + { + fprintf (stream, "p"); + + /* Reset the flag in the case where there are separate insn lines in the + sequence, so the 'p' is only emitted for the first line. This shows up + when we are doing the PCREL_OPT optimization, in that the label created + with %r<n> would have a leading 'p' printed. */ + prepend_p_to_next_insn = false; + } return; } +/* Emit the relocation to tie the next instruction to a previous instruction + that loads up an external address. This is used to do the PCREL_OPT + optimization. Note, the label is generated after the PLD of the got + pc-relative address to allow for the assembler to insert NOPs before the PLD + instruction. The operand is a constant integer that is the label + number. */ + +void +output_pcrel_opt_reloc (rtx label_num) +{ + rtx operands[1] = { label_num }; + output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)", + operands); +} + /* Adjust the length of an INSN. LENGTH is the currently-computed length and should be adjusted to reflect any required changes. This macro is used when there is some systematic length adjustment required that would be difficult diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index a131552..c0d7b1a 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -209,7 +209,7 @@ ;; What data size does this instruction work on? ;; This is used for insert, mul and others as necessary. -(define_attr "size" "8,16,32,64,128" (const_string "32")) +(define_attr "size" "8,16,32,64,128,256" (const_string "32")) ;; What is the insn_cost for this insn? The target hook can still override ;; this. For optimizing for size the "length" attribute is used instead. @@ -292,6 +292,10 @@ (const_string "no"))) +;; Whether an insn loads an external address for the PCREL_OPT optimizaton. +(define_attr "loads_external_address" "no,yes" + (const_string "no")) + ;; Return the number of real hardware instructions in a combined insn. If it ;; is 0, just use the length / 4. (define_attr "num_insns" "" (const_int 0)) @@ -671,6 +675,7 @@ ;; How many bits (per element) in this mode? (define_mode_attr bits [(QI "8") (HI "16") (SI "32") (DI "64") (SF "32") (DF "64") + (DD "64") (TD "128") (V4SI "32") (V2DI "64")]) ; DImode bits @@ -4068,7 +4073,7 @@ [(set_attr "type" "insert")]) ; There are also some forms without one of the ANDs. -(define_insn "*rotl<mode>3_insert_3" +(define_insn "rotl<mode>3_insert_3" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0") (match_operand:GPR 4 "const_int_operand" "n")) @@ -4083,6 +4088,24 @@ } [(set_attr "type" "insert")]) +(define_code_iterator plus_ior_xor [plus ior xor]) + +(define_split + [(set (match_operand:GPR 0 "gpc_reg_operand") + (plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand") + (match_operand:SI 2 "const_int_operand")) + (match_operand:GPR 3 "gpc_reg_operand")))] + "nonzero_bits (operands[3], <MODE>mode) + < HOST_WIDE_INT_1U << INTVAL (operands[2])" + [(set (match_dup 0) + (ior:GPR (and:GPR (match_dup 3) + (match_dup 4)) + (ashift:GPR (match_dup 1) + (match_dup 2))))] +{ + operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1); +}) + (define_insn "*rotl<mode>3_insert_4" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0") @@ -9991,7 +10014,7 @@ (unspec:SI [(const_int 0)] UNSPEC_TLSTLS)) (clobber (reg:SI LR_REGNO))] "TARGET_XCOFF && HAVE_AS_TLS" - "bla __get_tpointer") + "bla .__get_tpointer") (define_expand "tls_get_addr<mode>" [(set (match_operand:P 0 "gpc_reg_operand") @@ -10016,7 +10039,7 @@ (clobber (reg:CC CR0_REGNO)) (clobber (reg:P LR_REGNO))] "TARGET_XCOFF && HAVE_AS_TLS" - "bla __tls_get_addr") + "bla .__tls_get_addr") ;; Next come insns related to the calling sequence. ;; @@ -10243,7 +10266,8 @@ "TARGET_PCREL" "ld %0,%a1" [(set_attr "prefixed" "yes") - (set_attr "type" "load")]) + (set_attr "type" "load") + (set_attr "loads_external_address" "yes")]) ;; TOC register handling. @@ -14928,3 +14952,4 @@ (include "crypto.md") (include "htm.md") (include "fusion.md") +(include "pcrel-opt.md") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index ae9e91e..0dbdf75 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -609,6 +609,10 @@ mpcrel Target Mask(PCREL) Var(rs6000_isa_flags) Generate (do not generate) pc-relative memory addressing. +mpcrel-opt +Target Undocumented Mask(PCREL_OPT) Var(rs6000_isa_flags) +Generate (do not generate) pc-relative memory optimizations for externals. + mmma Target Mask(MMA) Var(rs6000_isa_flags) Generate (do not generate) MMA instructions. diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md index 11e4c03..40629dd 100644 --- a/gcc/config/rs6000/sync.md +++ b/gcc/config/rs6000/sync.md @@ -131,6 +131,7 @@ && !reg_mentioned_p (operands[0], operands[1])" "lq %0,%1" [(set_attr "type" "load") + (set_attr "size" "128") (set (attr "prefixed") (if_then_else (match_test "TARGET_PREFIXED") (const_string "yes") (const_string "no")))]) @@ -205,6 +206,7 @@ "TARGET_SYNC_TI" "stq %1,%0" [(set_attr "type" "store") + (set_attr "size" "128") (set (attr "prefixed") (if_then_else (match_test "TARGET_PREFIXED") (const_string "yes") (const_string "no")))]) @@ -333,7 +335,8 @@ && !reg_mentioned_p (operands[0], operands[1]) && quad_int_reg_operand (operands[0], PTImode)" "lqarx %0,%y1" - [(set_attr "type" "load_l")]) + [(set_attr "type" "load_l") + (set_attr "size" "128")]) (define_insn "store_conditional<mode>" [(set (match_operand:CC 0 "cc_reg_operand" "=x") @@ -394,7 +397,8 @@ (match_operand:PTI 2 "quad_int_reg_operand" "r"))] "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)" "stqcx. %2,%y1" - [(set_attr "type" "store_c")]) + [(set_attr "type" "store_c") + (set_attr "size" "128")]) (define_expand "atomic_compare_and_swap<mode>" [(match_operand:SI 0 "int_reg_operand") ;; bool out diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index 1541a65..44f7ffb 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -23,6 +23,10 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def TM_H += $(srcdir)/config/rs6000/rs6000-modes.h PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def +rs6000-pcrel-opt.o: $(srcdir)/config/rs6000/rs6000-pcrel-opt.c + $(COMPILE) $< + $(POSTCOMPILE) + rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c $(COMPILE) $< $(POSTCOMPILE) @@ -90,4 +94,5 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \ $(srcdir)/config/rs6000/crypto.md \ $(srcdir)/config/rs6000/htm.md \ $(srcdir)/config/rs6000/dfp.md \ - $(srcdir)/config/rs6000/fusion.md + $(srcdir)/config/rs6000/fusion.md \ + $(srcdir)/config/rs6000/pcrel-opt.md diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 3e05186..a1fa4f9 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -987,11 +987,13 @@ (define_insn_and_split "*vsx_le_perm_load_<mode>" [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r") (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))] - "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR + && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)" "@ # #" - "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR + && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)" [(const_int 0)] { rtx tmp = (can_create_pseudo_p () @@ -1008,7 +1010,8 @@ (define_insn "*vsx_le_perm_store_<mode>" [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q") (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))] - "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR + & !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" "@ # #" @@ -1019,7 +1022,8 @@ (define_split [(set (match_operand:VSX_LE_128 0 "memory_operand") (match_operand:VSX_LE_128 1 "vsx_register_operand"))] - "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR + && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" [(const_int 0)] { rtx tmp = (can_create_pseudo_p () @@ -1075,7 +1079,8 @@ (define_split [(set (match_operand:VSX_LE_128 0 "memory_operand") (match_operand:VSX_LE_128 1 "vsx_register_operand"))] - "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR + && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" [(const_int 0)] { rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); @@ -1241,7 +1246,8 @@ "VECTOR_MEM_VSX_P (<MODE>mode)" { /* Expand to swaps if needed, prior to swap optimization. */ - if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR) + if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR + && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode)) { rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); DONE; @@ -1254,7 +1260,8 @@ "VECTOR_MEM_VSX_P (<MODE>mode)" { /* Expand to swaps if needed, prior to swap optimization. */ - if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR) + if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR + && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode)) { rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); DONE; @@ -3030,28 +3037,22 @@ (use (match_operand:SI 4 "gpc_reg_operand"))] "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" { - rtx a = gen_reg_rtx (DImode); - rtx b = gen_reg_rtx (DImode); - rtx c = gen_reg_rtx (DImode); - rtx d = gen_reg_rtx (DImode); - emit_insn (gen_zero_extendsidi2 (a, operands[1])); - emit_insn (gen_zero_extendsidi2 (b, operands[2])); - emit_insn (gen_zero_extendsidi2 (c, operands[3])); - emit_insn (gen_zero_extendsidi2 (d, operands[4])); + rtx a = gen_lowpart_SUBREG (DImode, operands[1]); + rtx b = gen_lowpart_SUBREG (DImode, operands[2]); + rtx c = gen_lowpart_SUBREG (DImode, operands[3]); + rtx d = gen_lowpart_SUBREG (DImode, operands[4]); if (!BYTES_BIG_ENDIAN) { std::swap (a, b); std::swap (c, d); } - rtx aa = gen_reg_rtx (DImode); rtx ab = gen_reg_rtx (DImode); - rtx cc = gen_reg_rtx (DImode); rtx cd = gen_reg_rtx (DImode); - emit_insn (gen_ashldi3 (aa, a, GEN_INT (32))); - emit_insn (gen_ashldi3 (cc, c, GEN_INT (32))); - emit_insn (gen_iordi3 (ab, aa, b)); - emit_insn (gen_iordi3 (cd, cc, d)); + emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b, + GEN_INT (0xffffffff))); + emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d, + GEN_INT (0xffffffff))); rtx abcd = gen_reg_rtx (V2DImode); emit_insn (gen_vsx_concat_v2di (abcd, ab, cd)); diff --git a/gcc/config/rs6000/xcoff.h b/gcc/config/rs6000/xcoff.h index c016678..cb9aae7 100644 --- a/gcc/config/rs6000/xcoff.h +++ b/gcc/config/rs6000/xcoff.h @@ -255,11 +255,11 @@ } while (0) #ifdef HAVE_AS_TLS -#define ASM_OUTPUT_TLS_COMMON(FILE, DECL, NAME, SIZE) \ - do { fputs (COMMON_ASM_OP, (FILE)); \ - RS6000_OUTPUT_BASENAME ((FILE), (NAME)); \ - fprintf ((FILE), "[UL]," HOST_WIDE_INT_PRINT_UNSIGNED"\n", \ - (SIZE)); \ +#define ASM_OUTPUT_TLS_COMMON(FILE, DECL, NAME, SIZE) \ + do { fputs (LOCAL_COMMON_ASM_OP, (FILE)); \ + fprintf ((FILE), "%s," HOST_WIDE_INT_PRINT_UNSIGNED",%s[UL],3\n", \ + (*targetm.strip_name_encoding) (NAME), (SIZE), \ + (*targetm.strip_name_encoding) (NAME)); \ } while (0) #endif diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h index 8e23e31..4078440 100644 --- a/gcc/config/rx/rx.h +++ b/gcc/config/rx/rx.h @@ -629,6 +629,9 @@ typedef unsigned int CUMULATIVE_ARGS; #define PREFERRED_DEBUGGING_TYPE (TARGET_AS100_SYNTAX \ ? DBX_DEBUG : DWARF2_DEBUG) +#define DBX_DEBUGGING_INFO 1 +#define DWARF2_DEBUGGING_INFO 1 + #define INCOMING_FRAME_SP_OFFSET 4 #define ARG_POINTER_CFA_OFFSET(FNDECL) 4 diff --git a/gcc/config/s390/driver-native.c b/gcc/config/s390/driver-native.c index 4a065a5..c024715 100644 --- a/gcc/config/s390/driver-native.c +++ b/gcc/config/s390/driver-native.c @@ -124,7 +124,7 @@ s390_host_detect_local_cpu (int argc, const char **argv) cpu = "z15"; break; default: - cpu = "z15"; + cpu = "arch14"; break; } } diff --git a/gcc/config/s390/s390-builtin-types.def b/gcc/config/s390/s390-builtin-types.def index a2b7d4a..52ef572 100644 --- a/gcc/config/s390/s390-builtin-types.def +++ b/gcc/config/s390/s390-builtin-types.def @@ -267,6 +267,7 @@ DEF_FN_TYPE_2 (BT_FN_V2DI_V4SI_V4SI, BT_V2DI, BT_V4SI, BT_V4SI) DEF_FN_TYPE_2 (BT_FN_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT) DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR) DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF) +DEF_FN_TYPE_2 (BT_FN_V4SF_V8HI_UINT, BT_V4SF, BT_V8HI, BT_UINT) DEF_FN_TYPE_2 (BT_FN_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI) DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDCONSTPTR, BT_V4SI, BT_INT, BT_VOIDCONSTPTR) DEF_FN_TYPE_2 (BT_FN_V4SI_UV4SI_UV4SI, BT_V4SI, BT_UV4SI, BT_UV4SI) @@ -278,6 +279,7 @@ DEF_FN_TYPE_2 (BT_FN_V8HI_BV8HI_V8HI, BT_V8HI, BT_BV8HI, BT_V8HI) DEF_FN_TYPE_2 (BT_FN_V8HI_UV8HI_UV8HI, BT_V8HI, BT_UV8HI, BT_UV8HI) DEF_FN_TYPE_2 (BT_FN_V8HI_V16QI_V16QI, BT_V8HI, BT_V16QI, BT_V16QI) DEF_FN_TYPE_2 (BT_FN_V8HI_V4SI_V4SI, BT_V8HI, BT_V4SI, BT_V4SI) +DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI_UINT, BT_V8HI, BT_V8HI, BT_UINT) DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI) DEF_FN_TYPE_2 (BT_FN_VOID_UINT64PTR_UINT64, BT_VOID, BT_UINT64PTR, BT_UINT64) DEF_FN_TYPE_2 (BT_FN_VOID_V2DF_FLTPTR, BT_VOID, BT_V2DF, BT_FLTPTR) @@ -345,6 +347,7 @@ DEF_FN_TYPE_3 (BT_FN_V4SI_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_V4SI) DEF_FN_TYPE_3 (BT_FN_V4SI_V8HI_V8HI_V4SI, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI) DEF_FN_TYPE_3 (BT_FN_V8HI_UV8HI_UV8HI_INTPTR, BT_V8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) DEF_FN_TYPE_3 (BT_FN_V8HI_V16QI_V16QI_V8HI, BT_V8HI, BT_V16QI, BT_V16QI, BT_V8HI) +DEF_FN_TYPE_3 (BT_FN_V8HI_V4SF_V4SF_UINT, BT_V8HI, BT_V4SF, BT_V4SF, BT_UINT) DEF_FN_TYPE_3 (BT_FN_V8HI_V4SI_V4SI_INTPTR, BT_V8HI, BT_V4SI, BT_V4SI, BT_INTPTR) DEF_FN_TYPE_3 (BT_FN_V8HI_V8HI_V8HI_INTPTR, BT_V8HI, BT_V8HI, BT_V8HI, BT_INTPTR) DEF_FN_TYPE_3 (BT_FN_V8HI_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI) diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def index deb205b..129d712 100644 --- a/gcc/config/s390/s390-builtins.def +++ b/gcc/config/s390/s390-builtins.def @@ -273,6 +273,7 @@ #undef B_VXE #undef B_VXE2 #undef B_DEP +#undef B_NNPA #undef BFLAGS_MASK_INIT #define BFLAGS_MASK_INIT (B_INT) @@ -283,6 +284,7 @@ #define B_VXE (1 << 3) /* Builtins requiring the z14 vector extensions. */ #define B_VXE2 (1 << 4) /* Builtins requiring the z15 vector extensions. */ #define B_DEP (1 << 5) /* Builtin has been deprecated and a warning should be issued. */ +#define B_NNPA (1 << 6) /* Builtins requiring the NNPA Facility. */ /* B_DEF defines a standard (not overloaded) builtin B_DEF (<builtin name>, <RTL expander name>, <function attributes>, <builtin flags>, <operand flags, see above>, <fntype>) @@ -3005,3 +3007,13 @@ OB_DEF_VAR (s390_vstrsz_u32, s390_vstrszf, 0, B_DEF (s390_vstrszb, vstrszv16qi, 0, B_VXE2, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR) B_DEF (s390_vstrszh, vstrszv8hi, 0, B_VXE2, 0, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR) B_DEF (s390_vstrszf, vstrszv4si, 0, B_VXE2, 0, BT_FN_UV4SI_UV4SI_UV4SI_UV8HI_INTPTR) + +/* arch 14 builtins */ + +B_DEF (s390_vclfnhs, vclfnhs_v8hi, 0, B_NNPA, O3_U4, BT_FN_V4SF_V8HI_UINT) +B_DEF (s390_vclfnls, vclfnls_v8hi, 0, B_NNPA, O3_U4, BT_FN_V4SF_V8HI_UINT) + +B_DEF (s390_vcrnfs, vcrnfs_v8hi, 0, B_NNPA, O4_U4, BT_FN_V8HI_V4SF_V4SF_UINT) + +B_DEF (s390_vcfn, vcfn_v8hi, 0, B_NNPA, O3_U4, BT_FN_V8HI_V8HI_UINT) +B_DEF (s390_vcnf, vcnf_v8hi, 0, B_NNPA, O3_U4, BT_FN_V8HI_V8HI_UINT) diff --git a/gcc/config/s390/s390-c.c b/gcc/config/s390/s390-c.c index a5f5f56..7dbd8bf 100644 --- a/gcc/config/s390/s390-c.c +++ b/gcc/config/s390/s390-c.c @@ -339,7 +339,7 @@ s390_cpu_cpp_builtins_internal (cpp_reader *pfile, s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_OPT_VX), old_opts, opts, "__VX__", "__VX__"); s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_ZVECTOR), old_opts, - opts, "__VEC__=10303", "__VEC__"); + opts, "__VEC__=10304", "__VEC__"); s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_ZVECTOR), old_opts, opts, "__vector=__attribute__((vector_size(16)))", "__vector__"); diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h index d575180..4141b4d 100644 --- a/gcc/config/s390/s390-opts.h +++ b/gcc/config/s390/s390-opts.h @@ -38,6 +38,7 @@ enum processor_type PROCESSOR_2964_Z13, PROCESSOR_3906_Z14, PROCESSOR_8561_Z15, + PROCESSOR_ARCH14, PROCESSOR_NATIVE, PROCESSOR_max }; diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 9d2cee9..151136b 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -337,6 +337,7 @@ const struct s390_processor processor_table[] = { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 }, { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 }, { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 }, + { "arch14", "arch14", PROCESSOR_ARCH14, &zEC12_cost, 14 }, { "native", "", PROCESSOR_NATIVE, NULL, 0 } }; @@ -826,6 +827,12 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, error ("Builtin %qF requires z15 or higher.", fndecl); return const0_rtx; } + + if ((bflags & B_NNPA) && !TARGET_NNPA) + { + error ("Builtin %qF requires arch14 or higher.", fndecl); + return const0_rtx; + } } if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET && fcode < S390_ALL_BUILTIN_MAX) @@ -6562,6 +6569,7 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond, if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT) { + cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2); switch (cond) { /* NE a != b -> !(a == b) */ @@ -6600,6 +6608,19 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond, } else { + /* Turn x < 0 into x >> (bits per element - 1) */ + if (cond == LT && cmp_op2 == CONST0_RTX (mode)) + { + int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1; + rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1, + GEN_INT (shift), target, + 0, OPTAB_DIRECT); + if (res != target) + emit_move_insn (target, res); + return; + } + cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2); + switch (cond) { /* NE: a != b -> !(a == b) */ @@ -6817,11 +6838,7 @@ s390_expand_vcond (rtx target, rtx then, rtx els, if (!REG_P (cmp_op1)) cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1); - if (!REG_P (cmp_op2)) - cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2); - - s390_expand_vec_compare (result_target, cond, - cmp_op1, cmp_op2); + s390_expand_vec_compare (result_target, cond, cmp_op1, cmp_op2); /* If the results are supposed to be either -1 or 0 we are done since this is what our compare instructions generate anyway. */ @@ -8409,6 +8426,7 @@ s390_issue_rate (void) case PROCESSOR_2827_ZEC12: case PROCESSOR_2964_Z13: case PROCESSOR_3906_Z14: + case PROCESSOR_ARCH14: default: return 1; } @@ -14768,6 +14786,7 @@ s390_get_sched_attrmask (rtx_insn *insn) mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; break; case PROCESSOR_8561_Z15: + case PROCESSOR_ARCH14: if (get_attr_z15_cracked (insn)) mask |= S390_SCHED_ATTR_MASK_CRACKED; if (get_attr_z15_expanded (insn)) @@ -14815,6 +14834,7 @@ s390_get_unit_mask (rtx_insn *insn, int *units) mask |= 1 << 3; break; case PROCESSOR_8561_Z15: + case PROCESSOR_ARCH14: *units = 4; if (get_attr_z15_unit_lsu (insn)) mask |= 1 << 0; @@ -16688,6 +16708,89 @@ s390_shift_truncation_mask (machine_mode mode) return mode == DImode || mode == SImode ? 63 : 0; } +/* Return TRUE iff CONSTRAINT is an "f" constraint, possibly with additional + modifiers. */ + +static bool +f_constraint_p (const char *constraint) +{ + for (size_t i = 0, c_len = strlen (constraint); i < c_len; + i += CONSTRAINT_LEN (constraint[i], constraint + i)) + { + if (constraint[i] == 'f') + return true; + } + return false; +} + +/* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f" + constraints when long doubles are stored in vector registers. */ + +static rtx_insn * +s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, + vec<machine_mode> &input_modes, + vec<const char *> &constraints, vec<rtx> & /*clobbers*/, + HARD_REG_SET & /*clobbered_regs*/) +{ + if (!TARGET_VXE) + /* Long doubles are stored in FPR pairs - nothing to do. */ + return NULL; + + rtx_insn *after_md_seq = NULL, *after_md_end = NULL; + + unsigned ninputs = inputs.length (); + unsigned noutputs = outputs.length (); + for (unsigned i = 0; i < noutputs; i++) + { + if (GET_MODE (outputs[i]) != TFmode) + /* Not a long double - nothing to do. */ + continue; + const char *constraint = constraints[i]; + bool allows_mem, allows_reg, is_inout; + bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs, + &allows_mem, &allows_reg, &is_inout); + gcc_assert (ok); + if (!f_constraint_p (constraint)) + /* Long double with a constraint other than "=f" - nothing to do. */ + continue; + gcc_assert (allows_reg); + gcc_assert (!is_inout); + /* Copy output value from a FPR pair into a vector register. */ + rtx fprx2 = gen_reg_rtx (FPRX2mode); + push_to_sequence2 (after_md_seq, after_md_end); + emit_insn (gen_fprx2_to_tf (outputs[i], fprx2)); + after_md_seq = get_insns (); + after_md_end = get_last_insn (); + end_sequence (); + outputs[i] = fprx2; + } + + for (unsigned i = 0; i < ninputs; i++) + { + if (GET_MODE (inputs[i]) != TFmode) + /* Not a long double - nothing to do. */ + continue; + const char *constraint = constraints[noutputs + i]; + bool allows_mem, allows_reg; + bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0, + constraints.address (), &allows_mem, + &allows_reg); + gcc_assert (ok); + if (!f_constraint_p (constraint)) + /* Long double with a constraint other than "f" (or "=f" for inout + operands) - nothing to do. */ + continue; + gcc_assert (allows_reg); + /* Copy input value from a vector register into a FPR pair. */ + rtx fprx2 = gen_reg_rtx (FPRX2mode); + emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i])); + inputs[i] = fprx2; + input_modes[i] = FPRX2mode; + } + + return after_md_seq; +} + /* Initialize GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP @@ -16995,6 +17098,9 @@ s390_shift_truncation_mask (machine_mode mode) #undef TARGET_MAX_ANCHOR_OFFSET #define TARGET_MAX_ANCHOR_OFFSET 0xfff +#undef TARGET_MD_ASM_ADJUST +#define TARGET_MD_ASM_ADJUST s390_md_asm_adjust + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h" diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 2da768d..991af96 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -41,7 +41,9 @@ enum processor_flags PF_Z14 = 2048, PF_VXE = 4096, PF_VXE2 = 8192, - PF_Z15 = 16384 + PF_Z15 = 16384, + PF_NNPA = 32768, + PF_ARCH14 = 65536 }; /* This is necessary to avoid a warning about comparing different enum @@ -108,6 +110,14 @@ enum processor_flags (s390_arch_flags & PF_VXE2) #define TARGET_CPU_VXE2_P(opts) \ (opts->x_s390_arch_flags & PF_VXE2) +#define TARGET_CPU_ARCH14 \ + (s390_arch_flags & PF_ARCH14) +#define TARGET_CPU_ARCH14_P(opts) \ + (opts->x_s390_arch_flags & PF_ARCH14) +#define TARGET_CPU_NNPA \ + (s390_arch_flags & PF_NNPA) +#define TARGET_CPU_NNPA_P(opts) \ + (opts->x_s390_arch_flags & PF_NNPA) #define TARGET_HARD_FLOAT_P(opts) (!TARGET_SOFT_FLOAT_P(opts)) @@ -167,6 +177,14 @@ enum processor_flags (TARGET_VX && TARGET_CPU_VXE2) #define TARGET_VXE2_P(opts) \ (TARGET_VX_P (opts) && TARGET_CPU_VXE2_P (opts)) +#define TARGET_ARCH14 (TARGET_ZARCH && TARGET_CPU_ARCH14) +#define TARGET_ARCH14_P(opts) \ + (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_ARCH14_P (opts)) +#define TARGET_NNPA \ + (TARGET_ZARCH && TARGET_CPU_NNPA) +#define TARGET_NNPA_P(opts) \ + (TARGET_ZARCH_P (opts) && TARGET_CPU_NNPA_P (opts)) + #if defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13) #define TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS TARGET_Z13 #elif defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS) diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 3f96f5f..c10f25b 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -246,6 +246,13 @@ UNSPEC_VEC_VFMAX UNSPEC_VEC_ELTSWAP + + UNSPEC_NNPA_VCLFNHS_V8HI + UNSPEC_NNPA_VCLFNLS_V8HI + UNSPEC_NNPA_VCRNFS_V8HI + + UNSPEC_NNPA_VCFN_V8HI + UNSPEC_NNPA_VCNF_V8HI ]) ;; @@ -518,7 +525,7 @@ (const (symbol_ref "s390_tune_attr"))) (define_attr "cpu_facility" - "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2" + "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,arch14,nnpa" (const_string "standard")) (define_attr "enabled" "" @@ -583,7 +590,15 @@ (and (eq_attr "cpu_facility" "vxe2") (match_test "TARGET_VXE2")) (const_int 1) - ] + + (and (eq_attr "cpu_facility" "arch14") + (match_test "TARGET_ARCH14")) + (const_int 1) + + (and (eq_attr "cpu_facility" "nnpa") + (match_test "TARGET_NNPA")) + (const_int 1) +] (const_int 0))) ;; Whether an instruction supports relative long addressing. diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt index de7207e..1027f6a 100644 --- a/gcc/config/s390/s390.opt +++ b/gcc/config/s390/s390.opt @@ -116,6 +116,9 @@ EnumValue Enum(processor_type) String(arch13) Value(PROCESSOR_8561_Z15) EnumValue +Enum(processor_type) String(arch14) Value(PROCESSOR_ARCH14) + +EnumValue Enum(processor_type) String(native) Value(PROCESSOR_NATIVE) DriverOnly mbackchain diff --git a/gcc/config/s390/vecintrin.h b/gcc/config/s390/vecintrin.h index cbc8f4d..6bd26f8 100644 --- a/gcc/config/s390/vecintrin.h +++ b/gcc/config/s390/vecintrin.h @@ -173,6 +173,12 @@ __lcbb(const void *ptr, int bndry) #define vec_vsterg vec_vlerh #define vec_vsterf_flt vec_vlerf_flt #define vec_vsterg_dbl vec_vlerg_dbl + +#define vec_extend_to_fp32_hi __builtin_s390_vclfnhs +#define vec_extend_to_fp32_lo __builtin_s390_vclfnls +#define vec_round_from_fp32 __builtin_s390_vcrnfs +#define vec_convert_to_fp16 __builtin_s390_vcfn +#define vec_convert_from_fp16 __builtin_s390_vcnf #define vec_gather_element __builtin_s390_vec_gather_element #define vec_xl __builtin_s390_vec_xl #define vec_xld2 __builtin_s390_vec_xld2 diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 0e3c31f..c80d582 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -616,12 +616,23 @@ vlvgp\t%v0,%1,%N1" [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRR")]) -(define_insn "*fprx2_to_tf" - [(set (match_operand:TF 0 "nonimmediate_operand" "=v") - (subreg:TF (match_operand:FPRX2 1 "general_operand" "f") 0))] +(define_insn_and_split "fprx2_to_tf" + [(set (match_operand:TF 0 "nonimmediate_operand" "=v,AR") + (subreg:TF (match_operand:FPRX2 1 "general_operand" "f,f") 0))] "TARGET_VXE" - "vmrhg\t%v0,%1,%N1" - [(set_attr "op_type" "VRR")]) + "@ + vmrhg\t%v0,%1,%N1 + #" + "!(MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0]))" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + operands[2] = simplify_gen_subreg (DFmode, operands[0], TFmode, 0); + operands[3] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 0); + operands[4] = simplify_gen_subreg (DFmode, operands[0], TFmode, 8); + operands[5] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 8); +} + [(set_attr "op_type" "VRR,*")]) (define_insn "*vec_ti_to_v1ti" [(set (match_operand:V1TI 0 "nonimmediate_operand" "=v,v,R, v, v,v") @@ -753,6 +764,21 @@ "vpdi\t%V0,%v1,%V0,5" [(set_attr "op_type" "VRR")]) +(define_insn_and_split "tf_to_fprx2" + [(set (match_operand:FPRX2 0 "nonimmediate_operand" "=f,f") + (subreg:FPRX2 (match_operand:TF 1 "general_operand" "v,AR") 0))] + "TARGET_VXE" + "#" + "!(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + operands[2] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 0); + operands[3] = simplify_gen_subreg (DFmode, operands[1], TFmode, 0); + operands[4] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 8); + operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8); +}) + ; vec_perm_const for V2DI using vpdi? ;; @@ -1563,7 +1589,7 @@ [(set (match_operand:<TOINTVEC> 0 "register_operand" "") (match_operator:<TOINTVEC> 1 "vcond_comparison_operator" [(match_operand:V_HW 2 "register_operand" "") - (match_operand:V_HW 3 "register_operand" "")]))] + (match_operand:V_HW 3 "nonmemory_operand" "")]))] "TARGET_VX" { s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]); @@ -2454,6 +2480,42 @@ "HAVE_TF (trunctfsf2)" { EXPAND_TF (trunctfsf2, 2); }) +(define_expand "trunctf<DFP_ALL:mode>2_vr" + [(match_operand:DFP_ALL 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" "")] + "TARGET_HARD_DFP + && GET_MODE_SIZE (TFmode) > GET_MODE_SIZE (<DFP_ALL:MODE>mode) + && TARGET_VXE" +{ + rtx fprx2 = gen_reg_rtx (FPRX2mode); + emit_insn (gen_tf_to_fprx2 (fprx2, operands[1])); + emit_insn (gen_truncfprx2<DFP_ALL:mode>2 (operands[0], fprx2)); + DONE; +}) + +(define_expand "trunctf<DFP_ALL:mode>2" + [(match_operand:DFP_ALL 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" "")] + "HAVE_TF (trunctf<DFP_ALL:mode>2)" + { EXPAND_TF (trunctf<DFP_ALL:mode>2, 2); }) + +(define_expand "trunctdtf2_vr" + [(match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TD 1 "nonimmediate_operand" "")] + "TARGET_HARD_DFP && TARGET_VXE" +{ + rtx fprx2 = gen_reg_rtx (FPRX2mode); + emit_insn (gen_trunctdfprx22 (fprx2, operands[1])); + emit_insn (gen_fprx2_to_tf (operands[0], fprx2)); + DONE; +}) + +(define_expand "trunctdtf2" + [(match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TD 1 "nonimmediate_operand" "")] + "HAVE_TF (trunctdtf2)" + { EXPAND_TF (trunctdtf2, 2); }) + ; load lengthened (define_insn "extenddftf2_vr" @@ -2485,6 +2547,42 @@ "HAVE_TF (extendsftf2)" { EXPAND_TF (extendsftf2, 2); }) +(define_expand "extend<DFP_ALL:mode>tf2_vr" + [(match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:DFP_ALL 1 "nonimmediate_operand" "")] + "TARGET_HARD_DFP + && GET_MODE_SIZE (<DFP_ALL:MODE>mode) < GET_MODE_SIZE (TFmode) + && TARGET_VXE" +{ + rtx fprx2 = gen_reg_rtx (FPRX2mode); + emit_insn (gen_extend<DFP_ALL:mode>fprx22 (fprx2, operands[1])); + emit_insn (gen_fprx2_to_tf (operands[0], fprx2)); + DONE; +}) + +(define_expand "extend<DFP_ALL:mode>tf2" + [(match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:DFP_ALL 1 "nonimmediate_operand" "")] + "HAVE_TF (extend<DFP_ALL:mode>tf2)" + { EXPAND_TF (extend<DFP_ALL:mode>tf2, 2); }) + +(define_expand "extendtftd2_vr" + [(match_operand:TD 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" "")] + "TARGET_HARD_DFP && TARGET_VXE" +{ + rtx fprx2 = gen_reg_rtx (FPRX2mode); + emit_insn (gen_tf_to_fprx2 (fprx2, operands[1])); + emit_insn (gen_extendfprx2td2 (operands[0], fprx2)); + DONE; +}) + +(define_expand "extendtftd2" + [(match_operand:TD 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" "")] + "HAVE_TF (extendtftd2)" + { EXPAND_TF (extendtftd2, 2); }) + ; test data class (define_expand "signbittf2_vr" diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md index 816786f..3df501b 100644 --- a/gcc/config/s390/vx-builtins.md +++ b/gcc/config/s390/vx-builtins.md @@ -2312,3 +2312,58 @@ "TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS (<V_HW_HSD:MODE>mode)" "vstebr<bhfgq>\t%v1,%0,%2" [(set_attr "op_type" "VRX")]) + + +;; +;; NNPA Facility +;; + +(define_insn "vclfnhs_v8hi" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(vec_select:V4HI + (match_operand:V8HI 1 "register_operand" "v") + (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])) + (match_operand:QI 2 "const_mask_operand" "C")] + UNSPEC_NNPA_VCLFNHS_V8HI))] + "TARGET_NNPA" + "vclfnh\t%v0,%v1,2,%2" + [(set_attr "op_type" "VRR")]) + +(define_insn "vclfnls_v8hi" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(vec_select:V4HI + (match_operand:V8HI 1 "register_operand" "v") + (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)])) + (match_operand:QI 2 "const_mask_operand" "C")] + UNSPEC_NNPA_VCLFNLS_V8HI))] + "TARGET_NNPA" + "vclfnl\t%v0,%v1,2,%2" + [(set_attr "op_type" "VRR")]) + +(define_insn "vcrnfs_v8hi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v") + (match_operand:QI 3 "const_mask_operand" "C")] + UNSPEC_NNPA_VCRNFS_V8HI))] + "TARGET_NNPA" + "vcrnf\t%v0,%v1,%v2,%3,2" + [(set_attr "op_type" "VRR")]) + +(define_insn "vcfn_v8hi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:QI 2 "const_mask_operand" "C")] + UNSPEC_NNPA_VCFN_V8HI))] + "TARGET_NNPA" + "vcfn\t%v0,%v1,1,%2" + [(set_attr "op_type" "VRR")]) + +(define_insn "vcnf_v8hi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:QI 2 "const_mask_operand" "C")] + UNSPEC_NNPA_VCNF_V8HI))] + "TARGET_NNPA" + "vcnf\t%v0,%v1,%2,1" + [(set_attr "op_type" "VRR")]) diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index f355793..f150417 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -13585,23 +13585,18 @@ sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode) emit_insn (gen_rtx_SET (operands[0], bshuf)); } -/* On sparc, any mode which naturally allocates into the float +/* On the SPARC, any mode which naturally allocates into the single float registers should return 4 here. */ unsigned int sparc_regmode_natural_size (machine_mode mode) { - int size = UNITS_PER_WORD; + const enum mode_class cl = GET_MODE_CLASS (mode); - if (TARGET_ARCH64) - { - enum mode_class mclass = GET_MODE_CLASS (mode); - - if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT) - size = 4; - } + if ((cl == MODE_FLOAT || cl == MODE_VECTOR_INT) && GET_MODE_SIZE (mode) <= 4) + return 4; - return size; + return UNITS_PER_WORD; } /* Implement TARGET_HARD_REGNO_NREGS. diff --git a/gcc/config/sparc/t-sparc b/gcc/config/sparc/t-sparc index de99ce7..64906e9 100644 --- a/gcc/config/sparc/t-sparc +++ b/gcc/config/sparc/t-sparc @@ -27,3 +27,7 @@ sparc-c.o: $(srcdir)/config/sparc/sparc-c.c sparc-d.o: $(srcdir)/config/sparc/sparc-d.c $(COMPILE) $< $(POSTCOMPILE) + +# Hack around PR bootstrap/92002. +tree-ssanames.o-warn += -Wno-error=uninitialized -Wno-error=maybe-uninitialized +wide-int.o-warn += -Wno-error=uninitialized -Wno-error=maybe-uninitialized diff --git a/gcc/config/vax/vax.c b/gcc/config/vax/vax.c index fe4c14e..726c371 100644 --- a/gcc/config/vax/vax.c +++ b/gcc/config/vax/vax.c @@ -56,7 +56,7 @@ static int vax_address_cost (rtx, machine_mode, addr_space_t, bool); static bool vax_rtx_costs (rtx, machine_mode, int, int, int *, bool); static machine_mode vax_cc_modes_compatible (machine_mode, machine_mode); static rtx_insn *vax_md_asm_adjust (vec<rtx> &, vec<rtx> &, - vec<const char *> &, + vec<machine_mode> &, vec<const char *> &, vec<rtx> &, HARD_REG_SET &); static rtx vax_function_arg (cumulative_args_t, const function_arg_info &); static void vax_function_arg_advance (cumulative_args_t, @@ -1174,6 +1174,7 @@ vax_cc_modes_compatible (machine_mode m1, machine_mode m2) static rtx_insn * vax_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED, vec<rtx> &inputs ATTRIBUTE_UNUSED, + vec<machine_mode> &input_modes ATTRIBUTE_UNUSED, vec<const char *> &constraints ATTRIBUTE_UNUSED, vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) { diff --git a/gcc/config/visium/visium.c b/gcc/config/visium/visium.c index e0b88be..7eb2248 100644 --- a/gcc/config/visium/visium.c +++ b/gcc/config/visium/visium.c @@ -188,8 +188,9 @@ static bool visium_frame_pointer_required (void); static tree visium_build_builtin_va_list (void); static rtx_insn *visium_md_asm_adjust (vec<rtx> &, vec<rtx> &, - vec<const char *> &, - vec<rtx> &, HARD_REG_SET &); + vec<machine_mode> &, + vec<const char *> &, vec<rtx> &, + HARD_REG_SET &); static bool visium_legitimate_constant_p (machine_mode, rtx); @@ -791,9 +792,10 @@ visium_conditional_register_usage (void) the original cc0-based compiler. */ static rtx_insn * -visium_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/, - vec<const char *> &/*constraints*/, - vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) +visium_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, + vec<const char *> & /*constraints*/, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs) { clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REGNUM)); SET_HARD_REG_BIT (clobbered_regs, FLAGS_REGNUM); |