aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2021-03-11 16:12:22 -0800
committerIan Lance Taylor <iant@golang.org>2021-03-11 16:12:22 -0800
commitbc636c218f2b28da06cd1404d5b35d1f8cc43fd1 (patch)
tree764937d8460563db6132d7c75e19b95ef3ea6ea8 /gcc/config
parent89d7be42db00cd0953e7d4584877cf50a56ed046 (diff)
parent7ad5a72c8bc6aa71a0d195ddfa207db01265fe0b (diff)
downloadgcc-bc636c218f2b28da06cd1404d5b35d1f8cc43fd1.zip
gcc-bc636c218f2b28da06cd1404d5b35d1f8cc43fd1.tar.gz
gcc-bc636c218f2b28da06cd1404d5b35d1f8cc43fd1.tar.bz2
Merge from trunk revision 7ad5a72c8bc6aa71a0d195ddfa207db01265fe0b.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md23
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc3
-rw-r--r--gcc/config/aarch64/aarch64-sve.md6
-rw-r--r--gcc/config/aarch64/aarch64-tuning-flags.def2
-rw-r--r--gcc/config/aarch64/aarch64.c21
-rw-r--r--gcc/config/aarch64/aarch64.md12
-rw-r--r--gcc/config/aarch64/predicates.md16
-rw-r--r--gcc/config/arc/arc.c17
-rw-r--r--gcc/config/arm/aarch-common-protos.h8
-rw-r--r--gcc/config/arm/aarch-common.c7
-rw-r--r--gcc/config/arm/arm.c14
-rw-r--r--gcc/config/arm/arm.h10
-rw-r--r--gcc/config/arm/arm.md10
-rw-r--r--gcc/config/arm/thumb2.md47
-rw-r--r--gcc/config/cris/cris.c51
-rw-r--r--gcc/config/cris/cris.h45
-rw-r--r--gcc/config/cris/cris.md38
-rw-r--r--gcc/config/darwin-protos.h1
-rw-r--r--gcc/config/darwin.c10
-rw-r--r--gcc/config/darwin.h5
-rw-r--r--gcc/config/host-darwin.c3
-rw-r--r--gcc/config/i386/constraints.md6
-rw-r--r--gcc/config/i386/i386-options.c15
-rw-r--r--gcc/config/i386/i386.c112
-rw-r--r--gcc/config/i386/i386.h273
-rw-r--r--gcc/config/i386/i386.md49
-rw-r--r--gcc/config/i386/mmx.md19
-rw-r--r--gcc/config/i386/predicates.md4
-rw-r--r--gcc/config/i386/sse.md38
-rw-r--r--gcc/config/i386/winnt.c17
-rw-r--r--gcc/config/mips/mips.c4
-rw-r--r--gcc/config/mn10300/mn10300.c7
-rw-r--r--gcc/config/nds32/nds32.c1
-rw-r--r--gcc/config/pa/pa.c504
-rw-r--r--gcc/config/pdp11/pdp11.c9
-rw-r--r--gcc/config/riscv/riscv-shorten-memrefs.c34
-rw-r--r--gcc/config/riscv/riscv.c41
-rw-r--r--gcc/config/rs6000/aix.h3
-rw-r--r--gcc/config/rs6000/dfp.md37
-rw-r--r--gcc/config/rs6000/fusion.md177
-rwxr-xr-xgcc/config/rs6000/genfusion.pl7
-rw-r--r--gcc/config/rs6000/mma.md9
-rw-r--r--gcc/config/rs6000/pcrel-opt.md401
-rw-r--r--gcc/config/rs6000/predicates.md35
-rw-r--r--gcc/config/rs6000/rs6000-builtin.def74
-rw-r--r--gcc/config/rs6000/rs6000-call.c41
-rw-r--r--gcc/config/rs6000/rs6000-cpus.def2
-rw-r--r--gcc/config/rs6000/rs6000-passes.def8
-rw-r--r--gcc/config/rs6000/rs6000-pcrel-opt.c910
-rw-r--r--gcc/config/rs6000/rs6000-protos.h4
-rw-r--r--gcc/config/rs6000/rs6000.c132
-rw-r--r--gcc/config/rs6000/rs6000.md35
-rw-r--r--gcc/config/rs6000/rs6000.opt4
-rw-r--r--gcc/config/rs6000/sync.md8
-rw-r--r--gcc/config/rs6000/t-rs60007
-rw-r--r--gcc/config/rs6000/vsx.md43
-rw-r--r--gcc/config/rs6000/xcoff.h10
-rw-r--r--gcc/config/rx/rx.h3
-rw-r--r--gcc/config/s390/driver-native.c2
-rw-r--r--gcc/config/s390/s390-builtin-types.def3
-rw-r--r--gcc/config/s390/s390-builtins.def12
-rw-r--r--gcc/config/s390/s390-c.c2
-rw-r--r--gcc/config/s390/s390-opts.h1
-rw-r--r--gcc/config/s390/s390.c116
-rw-r--r--gcc/config/s390/s390.h20
-rw-r--r--gcc/config/s390/s390.md19
-rw-r--r--gcc/config/s390/s390.opt3
-rw-r--r--gcc/config/s390/vecintrin.h6
-rw-r--r--gcc/config/s390/vector.md110
-rw-r--r--gcc/config/s390/vx-builtins.md55
-rw-r--r--gcc/config/sparc/sparc.c15
-rw-r--r--gcc/config/sparc/t-sparc4
-rw-r--r--gcc/config/vax/vax.c3
-rw-r--r--gcc/config/visium/visium.c12
74 files changed, 3063 insertions, 752 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 71aa77d..348a43d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1738,7 +1738,7 @@
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ>
(lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "aarch64_simd_rshift_imm")))
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
@@ -1751,7 +1751,7 @@
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
(truncate:<VNARROWQ>
(lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "aarch64_simd_rshift_imm")))))]
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
@@ -1786,8 +1786,8 @@
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "aarch64_simd_rshift_imm")
- ] UNSPEC_RSHRN)
+ (match_operand:VQN 2
+ "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
@@ -1799,8 +1799,8 @@
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "aarch64_simd_rshift_imm")
- ] UNSPEC_RSHRN)))]
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+ UNSPEC_RSHRN)))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
@@ -1836,7 +1836,7 @@
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(truncate:<VNARROWQ>
(lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
- (match_operand:VQN 3 "aarch64_simd_rshift_imm")))))]
+ (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
[(set_attr "type" "neon_shift_imm_narrow_q")]
@@ -1847,7 +1847,8 @@
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ>
(lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
- (match_operand:VQN 3 "aarch64_simd_rshift_imm")))
+ (match_operand:VQN 3
+ "aarch64_simd_shift_imm_vec_<vn_mode>")))
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
@@ -1878,7 +1879,8 @@
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
- (match_operand:VQN 3 "aarch64_simd_rshift_imm")] UNSPEC_RSHRN)))]
+ (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+ UNSPEC_RSHRN)))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
[(set_attr "type" "neon_shift_imm_narrow_q")]
@@ -1888,7 +1890,8 @@
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
- (match_operand:VQN 3 "aarch64_simd_rshift_imm")] UNSPEC_RSHRN)
+ (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+ UNSPEC_RSHRN)
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 6270b51..25612d2 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1467,6 +1467,9 @@ function_resolver::require_vector_type (unsigned int argno,
{
tree expected = acle_vector_types[0][type];
tree actual = get_argument_type (argno);
+ if (actual == error_mark_node)
+ return false;
+
if (!matches_type_p (expected, actual))
{
error_at (location, "passing %qT to argument %d of %qE, which"
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 6083196..7db2938 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4549,10 +4549,8 @@
}
else
{
- amount = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_vec_duplicate<mode> (amount,
- convert_to_mode (<VEL>mode,
- operands[2], 0)));
+ amount = convert_to_mode (<VEL>mode, operands[2], 0);
+ amount = expand_vector_broadcast (<MODE>mode, amount);
}
emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
DONE;
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index aae9952..588edf4 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -46,4 +46,6 @@ AARCH64_EXTRA_TUNING_OPTION ("no_ldp_stp_qregs", NO_LDP_STP_QREGS)
AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
+AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS)
+
#undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 146ed8c..8a86889 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1492,7 +1492,7 @@ static const struct tune_params neoversev1_tunings =
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS), /* tune_flags. */
&generic_prefetch_tune
};
@@ -12589,8 +12589,18 @@ cost_plus:
*cost += rtx_cost (op0, mode, PLUS, 0, speed);
if (speed)
- /* ADD (immediate). */
- *cost += extra_cost->alu.arith;
+ {
+ /* ADD (immediate). */
+ *cost += extra_cost->alu.arith;
+
+ /* Some tunings prefer to not use the VL-based scalar ops.
+ Increase the cost of the poly immediate to prevent their
+ formation. */
+ if (GET_CODE (op1) == CONST_POLY_INT
+ && (aarch64_tune_params.extra_tuning_flags
+ & AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS))
+ *cost += COSTS_N_INSNS (1);
+ }
return true;
}
@@ -13492,6 +13502,9 @@ aarch64_init_builtins ()
{
aarch64_general_init_builtins ();
aarch64_sve::init_builtins ();
+#ifdef SUBTARGET_INIT_BUILTINS
+ SUBTARGET_INIT_BUILTINS;
+#endif
}
/* Implement TARGET_FOLD_BUILTIN. */
@@ -17263,7 +17276,7 @@ aarch64_composite_type_p (const_tree type,
parameter passing registers are available).
Upon successful return, *COUNT returns the number of needed registers,
- *BASE_MODE returns the mode of the individual register and when IS_HAF
+ *BASE_MODE returns the mode of the individual register and when IS_HA
is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
floating-point aggregate or a homogeneous short-vector aggregate.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a482419..b2abb5b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1933,6 +1933,14 @@
&& (!REG_P (op1)
|| !REGNO_PTR_FRAME_P (REGNO (op1))))
operands[2] = force_reg (<MODE>mode, operands[2]);
+ /* Some tunings prefer to avoid VL-based operations.
+ Split off the poly immediate here. The rtx costs hook will reject attempts
+ to combine them back. */
+ else if (GET_CODE (operands[2]) == CONST_POLY_INT
+ && can_create_pseudo_p ()
+ && (aarch64_tune_params.extra_tuning_flags
+ & AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS))
+ operands[2] = force_reg (<MODE>mode, operands[2]);
/* Expand polynomial additions now if the destination is the stack
pointer, since we don't want to use that as a temporary. */
else if (operands[0] == stack_pointer_rtx
@@ -4421,7 +4429,7 @@
(match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
(match_operand:GPI 3 "register_operand" "r")))]
""
- "<logical>\\t%<w>0, %<w>3, %<w>1, ror (<sizen> - %2)"
+ "<logical>\\t%<w>0, %<w>3, %<w>1, ror #(<sizen> - %2)"
[(set_attr "type" "logic_shift_imm")]
)
@@ -4446,7 +4454,7 @@
(match_operand:QI 2 "aarch64_shift_imm_si" "n"))
(match_operand:SI 3 "register_operand" "r"))))]
""
- "<logical>\\t%w0, %w3, %w1, ror (32 - %2)"
+ "<logical>\\t%w0, %w3, %w1, ror #(32 - %2)"
[(set_attr "type" "logic_shift_imm")]
)
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 75612fd..c55842b 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -545,6 +545,22 @@
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 1, 64)")))
+(define_predicate "aarch64_simd_shift_imm_vec_qi"
+ (and (match_code "const_vector")
+ (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)")))
+
+(define_predicate "aarch64_simd_shift_imm_vec_hi"
+ (and (match_code "const_vector")
+ (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 16)")))
+
+(define_predicate "aarch64_simd_shift_imm_vec_si"
+ (and (match_code "const_vector")
+ (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 32)")))
+
+(define_predicate "aarch64_simd_shift_imm_vec_di"
+ (and (match_code "const_vector")
+ (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 64)")))
+
(define_predicate "aarch64_simd_shift_imm_bitsize_qi"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 8)")))
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 367e4c9..3201c3f 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -10285,23 +10285,6 @@ arc_regno_use_in (unsigned int regno, rtx x)
return NULL_RTX;
}
-/* Return the integer value of the "type" attribute for INSN, or -1 if
- INSN can't have attributes. */
-
-static int
-arc_attr_type (rtx_insn *insn)
-{
- if (NONJUMP_INSN_P (insn)
- ? (GET_CODE (PATTERN (insn)) == USE
- || GET_CODE (PATTERN (insn)) == CLOBBER)
- : JUMP_P (insn)
- ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
- || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
- : !CALL_P (insn))
- return -1;
- return get_attr_type (insn);
-}
-
/* Code has a minimum p2 alignment of 1, which we must restore after
an ADDR_DIFF_VEC. */
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 7a9cf3d..b6171e8 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -144,9 +144,9 @@ struct cpu_cost_table
const struct vector_cost_table vect;
};
-rtx_insn *
-arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
- vec<const char *> &constraints,
- vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs);
+rtx_insn *arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
+ vec<machine_mode> & /*input_modes*/,
+ vec<const char *> &constraints,
+ vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs);
#endif /* GCC_AARCH_COMMON_PROTOS_H */
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index 6ff4215..24711d5 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -531,9 +531,10 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
We implement asm flag outputs. */
rtx_insn *
-arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
- vec<const char *> &constraints,
- vec<rtx> &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
+arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
+ vec<machine_mode> & /*input_modes*/,
+ vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
+ HARD_REG_SET & /*clobbered_regs*/)
{
bool saw_asm_flag = false;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d254f41..49635bc 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -328,9 +328,10 @@ static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
static bool arm_modes_tieable_p (machine_mode, machine_mode);
static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
-static rtx_insn * thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
- vec<const char *> &, vec<rtx> &,
- HARD_REG_SET &);
+static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
+ vec<machine_mode> &,
+ vec<const char *> &, vec<rtx> &,
+ HARD_REG_SET &);
/* Table of machine attributes. */
static const struct attribute_spec arm_attribute_table[] =
@@ -33915,9 +33916,10 @@ arm_run_selftests (void)
Unlike the arm version, we do NOT implement asm flag outputs. */
rtx_insn *
-thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
- vec<const char *> &constraints,
- vec<rtx> &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
+thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
+ vec<machine_mode> & /*input_modes*/,
+ vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
+ HARD_REG_SET & /*clobbered_regs*/)
{
for (unsigned i = 0, n = outputs.length (); i < n; ++i)
if (strncmp (constraints[i], "=@cc", 4) == 0)
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 6bc03ad..113c015 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -390,7 +390,10 @@ emission of floating point pcs attributes. */
--with-float is ignored if -mfloat-abi is specified.
--with-fpu is ignored if -mfpu is specified.
--with-abi is ignored if -mabi is specified.
- --with-tls is ignored if -mtls-dialect is specified. */
+ --with-tls is ignored if -mtls-dialect is specified.
+ Note: --with-mode is not handled here, that has a special rule
+ TARGET_MODE_CHECK that also takes into account the selected CPU and
+ architecture. */
#define OPTION_DEFAULT_SPECS \
{"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \
{"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
@@ -398,7 +401,6 @@ emission of floating point pcs attributes. */
{"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" }, \
{"fpu", "%{!mfpu=*:-mfpu=%(VALUE)}"}, \
{"abi", "%{!mabi=*:-mabi=%(VALUE)}"}, \
- {"mode", "%{!marm:%{!mthumb:-m%(VALUE)}}"}, \
{"tls", "%{!mtls-dialect=*:-mtls-dialect=%(VALUE)}"},
extern const struct arm_fpu_desc
@@ -2424,9 +2426,9 @@ extern const char *arm_asm_auto_mfpu (int argc, const char **argv);
" mcpu=*:-mcpu=%:rewrite_mcpu(%{mcpu=*:%*})" \
" }"
-extern const char *arm_target_thumb_only (int argc, const char **argv);
+extern const char *arm_target_mode (int argc, const char **argv);
#define TARGET_MODE_SPEC_FUNCTIONS \
- { "target_mode_check", arm_target_thumb_only },
+ { "target_mode_check", arm_target_mode },
/* -mcpu=native handling only makes sense with compiler running on
an ARM chip. */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 3e441f9..45a471a 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -9216,6 +9216,11 @@
else
{
rtx mem = force_const_mem (SImode, operands[1]);
+ if (!general_operand (mem, SImode))
+ {
+ emit_move_insn (operands[2], XEXP (mem, 0));
+ mem = replace_equiv_address (mem, operands[2], false);
+ }
emit_move_insn (operands[2], mem);
}
}
@@ -9299,6 +9304,11 @@
else
{
rtx mem = force_const_mem (SImode, operands[1]);
+ if (!general_operand (mem, SImode))
+ {
+ emit_move_insn (operands[3], XEXP (mem, 0));
+ mem = replace_equiv_address (mem, operands[3], false);
+ }
emit_move_insn (operands[3], mem);
}
}
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index d7fd96c..5772f4d 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -536,19 +536,26 @@
[(set_attr "type" "call")]
)
-(define_insn "*nonsecure_call_reg_thumb2"
+(define_insn "*nonsecure_call_reg_thumb2_fpcxt"
[(call (unspec:SI [(mem:SI (match_operand:SI 0 "s_register_operand" "l*r"))]
UNSPEC_NONSECURE_MEM)
(match_operand 1 "" ""))
(use (match_operand 2 "" ""))
(clobber (reg:SI LR_REGNUM))]
- "TARGET_THUMB2 && use_cmse"
- {
- if (TARGET_HAVE_FPCXT_CMSE)
- return "blxns\\t%0";
- else
- return "bl\\t__gnu_cmse_nonsecure_call";
- }
+ "TARGET_THUMB2 && use_cmse && TARGET_HAVE_FPCXT_CMSE"
+ "blxns\\t%0"
+ [(set_attr "length" "4")
+ (set_attr "type" "call")]
+)
+
+(define_insn "*nonsecure_call_reg_thumb2"
+ [(call (unspec:SI [(mem:SI (reg:SI R4_REGNUM))]
+ UNSPEC_NONSECURE_MEM)
+ (match_operand 0 "" ""))
+ (use (match_operand 1 "" ""))
+ (clobber (reg:SI LR_REGNUM))]
+ "TARGET_THUMB2 && use_cmse && !TARGET_HAVE_FPCXT_CMSE"
+ "bl\\t__gnu_cmse_nonsecure_call"
[(set_attr "length" "4")
(set_attr "type" "call")]
)
@@ -564,7 +571,7 @@
[(set_attr "type" "call")]
)
-(define_insn "*nonsecure_call_value_reg_thumb2"
+(define_insn "*nonsecure_call_value_reg_thumb2_fpcxt"
[(set (match_operand 0 "" "")
(call
(unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "l*r"))]
@@ -572,13 +579,21 @@
(match_operand 2 "" "")))
(use (match_operand 3 "" ""))
(clobber (reg:SI LR_REGNUM))]
- "TARGET_THUMB2 && use_cmse"
- {
- if (TARGET_HAVE_FPCXT_CMSE)
- return "blxns\\t%1";
- else
- return "bl\\t__gnu_cmse_nonsecure_call";
- }
+ "TARGET_THUMB2 && use_cmse && TARGET_HAVE_FPCXT_CMSE"
+ "blxns\\t%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "call")]
+)
+
+(define_insn "*nonsecure_call_value_reg_thumb2"
+ [(set (match_operand 0 "" "")
+ (call
+ (unspec:SI [(mem:SI (reg:SI R4_REGNUM))] UNSPEC_NONSECURE_MEM)
+ (match_operand 1 "" "")))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:SI LR_REGNUM))]
+ "TARGET_THUMB2 && use_cmse && !TARGET_HAVE_FPCXT_CMSE"
+ "bl\\t__gnu_cmse_nonsecure_call"
[(set_attr "length" "4")
(set_attr "type" "call")]
)
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
index 48ea855..d9213d7 100644
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -150,7 +150,7 @@ static rtx cris_function_incoming_arg (cumulative_args_t,
static void cris_function_arg_advance (cumulative_args_t,
const function_arg_info &);
static rtx_insn *cris_md_asm_adjust (vec<rtx> &, vec<rtx> &,
- vec<const char *> &,
+ vec<machine_mode> &, vec<const char *> &,
vec<rtx> &, HARD_REG_SET &);
static void cris_option_override (void);
@@ -864,7 +864,7 @@ cris_reg_saved_in_regsave_area (unsigned int regno)
return
(((df_regs_ever_live_p (regno)
&& !call_used_or_fixed_reg_p (regno)))
- && (regno != FRAME_POINTER_REGNUM || !frame_pointer_needed)
+ && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)
&& regno != CRIS_SRP_REGNUM)
|| (crtl->calls_eh_return
&& (regno == EH_RETURN_DATA_REGNO (0)
@@ -880,9 +880,6 @@ cris_print_operand (FILE *file, rtx x, int code)
{
rtx operand = x;
- /* Size-strings corresponding to MULT expressions. */
- static const char *const mults[] = { "BAD:0", ".b", ".w", "BAD:3", ".d" };
-
/* New code entries should just be added to the switch below. If
handling is finished, just return. If handling was just a
modification of the operand, the modified operand should be put in
@@ -1212,11 +1209,21 @@ cris_print_operand (FILE *file, rtx x, int code)
return;
case 'T':
- /* Print the size letter for an operand to a MULT, which must be a
- const_int with a suitable value. */
- if (!CONST_INT_P (operand) || INTVAL (operand) > 4)
- LOSE_AND_RETURN ("invalid operand for 'T' modifier", x);
- fprintf (file, "%s", mults[INTVAL (operand)]);
+ {
+ /* Print the size letter for an operand to a ASHIFT, which must be a
+ const_int with a suitable value. */
+ int shiftval;
+
+ if (!CONST_INT_P (operand))
+ LOSE_AND_RETURN ("invalid operand for 'T' modifier", x);
+
+ shiftval = INTVAL (operand);
+
+ if (!(shiftval == 1 || shiftval == 2))
+ LOSE_AND_RETURN ("invalid operand for 'T' modifier", x);
+
+ fprintf (file, "%s", shiftval == 1 ? ".w" : ".d");
+ }
return;
case 0:
@@ -1438,7 +1445,7 @@ cris_initial_elimination_offset (int fromreg, int toreg)
int ap_fp_offset = 4 + (return_address_on_stack ? 4 : 0);
if (fromreg == ARG_POINTER_REGNUM
- && toreg == FRAME_POINTER_REGNUM)
+ && toreg == HARD_FRAME_POINTER_REGNUM)
return ap_fp_offset;
/* Between the frame pointer and the stack are only "normal" stack
@@ -1452,6 +1459,10 @@ cris_initial_elimination_offset (int fromreg, int toreg)
&& toreg == STACK_POINTER_REGNUM)
return ap_fp_offset + fp_sp_offset - 4;
+ if (fromreg == FRAME_POINTER_REGNUM
+ && toreg == HARD_FRAME_POINTER_REGNUM)
+ return 0;
+
gcc_unreachable ();
}
@@ -2742,10 +2753,10 @@ cris_expand_prologue (void)
mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
set_mem_alias_set (mem, get_frame_alias_set ());
- insn = emit_move_insn (mem, frame_pointer_rtx);
+ insn = emit_move_insn (mem, hard_frame_pointer_rtx);
RTX_FRAME_RELATED_P (insn) = 1;
- insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+ insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
RTX_FRAME_RELATED_P (insn) = 1;
framesize += 4;
@@ -2885,8 +2896,13 @@ cris_expand_prologue (void)
framesize += size + cfoa_size;
}
+ /* FIXME: -mmax-stackframe=SIZE is obsoleted; use -Wstack-usage=SIZE
+ instead. Make it an alias? */
if (cris_max_stackframe && framesize > cris_max_stackframe)
warning (0, "stackframe too big: %d bytes", framesize);
+
+ if (flag_stack_usage_info)
+ current_function_static_stack_size = framesize;
}
/* The expander for the epilogue pattern. */
@@ -3003,11 +3019,11 @@ cris_expand_epilogue (void)
emit_insn (gen_cris_frame_deallocated_barrier ());
- emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+ emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
mem = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode,
stack_pointer_rtx));
set_mem_alias_set (mem, get_frame_alias_set ());
- insn = emit_move_insn (frame_pointer_rtx, mem);
+ insn = emit_move_insn (hard_frame_pointer_rtx, mem);
/* Whenever we emit insns with post-incremented addresses
ourselves, we must add a post-inc note manually. */
@@ -3489,8 +3505,9 @@ cris_function_arg_advance (cumulative_args_t ca_v,
static rtx_insn *
cris_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
- vec<const char *> &constraints,
- vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
+ vec<machine_mode> & /*input_modes*/,
+ vec<const char *> &constraints, vec<rtx> &clobbers,
+ HARD_REG_SET &clobbered_regs)
{
/* For the time being, all asms clobber condition codes.
Revisit when there's a reasonable use for inputs/outputs
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index d691da9..1ab830e 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -352,13 +352,6 @@ extern int cris_cpu_version;
with other GNU/Linux ports (i.e. elfos.h users). */
#undef PCC_BITFIELD_TYPE_MATTERS
-/* This is only used for non-scalars. Strange stuff happens to structs
- (FIXME: What?) if we use anything larger than largest actually used
- datum size, so lets make it 32. The type "long long" will still work
- as usual. We can still have DImode insns, but they will only be used
- for scalar data (i.e. long long). */
-#define MAX_FIXED_MODE_SIZE 32
-
/* Node: Type Layout */
@@ -380,8 +373,8 @@ extern int cris_cpu_version;
/* Node: Register Basics */
/* We count all 16 non-special registers, SRP, a faked argument
- pointer register, MOF and CCR/DCCR. */
-#define FIRST_PSEUDO_REGISTER (16 + 1 + 1 + 1 + 1)
+ pointer register, MOF, CCR/DCCR, and the faked frame-pointer. */
+#define FIRST_PSEUDO_REGISTER (16 + 1 + 1 + 1 + 1 + 1)
/* For CRIS, these are r15 (pc) and r14 (sp). Register r8 is used as a
frame-pointer, but is not fixed. SRP is not included in general
@@ -389,12 +382,12 @@ extern int cris_cpu_version;
registers are fixed at the moment. The faked argument pointer register
is fixed too. */
#define FIXED_REGISTERS \
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1}
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1}
/* Register r9 is used for structure-address, r10-r13 for parameters,
r10- for return values. */
#define CALL_USED_REGISTERS \
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1}
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
/* Node: Allocation Order */
@@ -417,7 +410,8 @@ extern int cris_cpu_version;
Use struct-return address first, since very few functions use
structure return values so it is likely to be available. */
#define REG_ALLOC_ORDER \
- {9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 17, 16, 18, 19}
+ {9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 17, 16, 18, 19, \
+ 20}
/* Node: Leaf Functions */
@@ -458,6 +452,9 @@ enum reg_class
#define CRIS_SPECIAL_REGS_CONTENTS \
((1 << CRIS_SRP_REGNUM) | (1 << CRIS_MOF_REGNUM) | (1 << CRIS_CC0_REGNUM))
+#define CRIS_FAKED_REGS_CONTENTS \
+ ((1 << CRIS_AP_REGNUM) | (1 << CRIS_FP_REGNUM))
+
/* Count in the faked argument register in GENERAL_REGS. Keep out SRP. */
#define REG_CLASS_CONTENTS \
{ \
@@ -471,13 +468,13 @@ enum reg_class
{CRIS_SPECIAL_REGS_CONTENTS}, \
{CRIS_SPECIAL_REGS_CONTENTS \
| (1 << CRIS_ACR_REGNUM)}, \
- {(0xffff | (1 << CRIS_AP_REGNUM)) \
+ {(0xffff | CRIS_FAKED_REGS_CONTENTS) \
& ~(1 << CRIS_ACR_REGNUM)}, \
- {(0xffff | (1 << CRIS_AP_REGNUM) \
+ {(0xffff | CRIS_FAKED_REGS_CONTENTS \
| CRIS_SPECIAL_REGS_CONTENTS) \
& ~(1 << CRIS_ACR_REGNUM)}, \
- {0xffff | (1 << CRIS_AP_REGNUM)}, \
- {0xffff | (1 << CRIS_AP_REGNUM) \
+ {0xffff | CRIS_FAKED_REGS_CONTENTS}, \
+ {0xffff | CRIS_FAKED_REGS_CONTENTS \
| CRIS_SPECIAL_REGS_CONTENTS} \
}
@@ -500,8 +497,10 @@ enum reg_class
allocation. */
#define REGNO_OK_FOR_BASE_P(REGNO) \
((REGNO) <= CRIS_LAST_GENERAL_REGISTER \
+ || (REGNO) == FRAME_POINTER_REGNUM \
|| (REGNO) == ARG_POINTER_REGNUM \
|| (unsigned) reg_renumber[REGNO] <= CRIS_LAST_GENERAL_REGISTER \
+ || (unsigned) reg_renumber[REGNO] == FRAME_POINTER_REGNUM \
|| (unsigned) reg_renumber[REGNO] == ARG_POINTER_REGNUM)
/* See REGNO_OK_FOR_BASE_P. */
@@ -587,6 +586,9 @@ enum reg_class
/* Register used for frame pointer. This is also the last of the saved
registers, when a frame pointer is not used. */
+#define HARD_FRAME_POINTER_REGNUM CRIS_REAL_FP_REGNUM
+
+/* Faked register, is always eliminated to at least CRIS_REAL_FP_REGNUM. */
#define FRAME_POINTER_REGNUM CRIS_FP_REGNUM
/* Faked register, is always eliminated. We need it to eliminate
@@ -595,13 +597,17 @@ enum reg_class
#define STATIC_CHAIN_REGNUM CRIS_STATIC_CHAIN_REGNUM
+/* No unwind context is needed for faked registers nor DCCR. Currently not MOF
+ too, but let's keep that open. */
+#define DWARF_FRAME_REGISTERS (CRIS_MOF_REGNUM + 1)
/* Node: Elimination */
#define ELIMINABLE_REGS \
{{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
- {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \
- {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+ {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \
+ {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+ {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
(OFFSET) = cris_initial_elimination_offset (FROM, TO)
@@ -822,7 +828,8 @@ struct cum_args {int regs;};
#define REGISTER_NAMES \
{"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", \
- "r9", "r10", "r11", "r12", "r13", "sp", "pc", "srp", "mof", "faked_ap", "dccr"}
+ "r9", "r10", "r11", "r12", "r13", "sp", "pc", "srp", \
+ "mof", "faked_ap", "dccr", "faked_fp"}
#define ADDITIONAL_REGISTER_NAMES \
{{"r14", 14}, {"r15", 15}}
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 0fd29f9..7de0ec6 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -58,13 +58,14 @@
;; Register numbers.
(define_constants
[(CRIS_STATIC_CHAIN_REGNUM 7)
- (CRIS_FP_REGNUM 8)
+ (CRIS_REAL_FP_REGNUM 8)
(CRIS_SP_REGNUM 14)
(CRIS_ACR_REGNUM 15)
(CRIS_SRP_REGNUM 16)
(CRIS_MOF_REGNUM 17)
(CRIS_AP_REGNUM 18)
- (CRIS_CC0_REGNUM 19)]
+ (CRIS_CC0_REGNUM 19)
+ (CRIS_FP_REGNUM 20)]
)
;; We need an attribute to define whether an instruction can be put in
@@ -1278,18 +1279,43 @@
(define_insn "*addi"
[(set (match_operand:SI 0 "register_operand" "=r")
(plus:SI
- (mult:SI (match_operand:SI 2 "register_operand" "r")
- (match_operand:SI 3 "const_int_operand" "n"))
+ (ashift:SI (match_operand:SI 2 "register_operand" "r")
+ (match_operand:SI 3 "const_int_operand" "n"))
(match_operand:SI 1 "register_operand" "0")))]
"operands[0] != frame_pointer_rtx
&& operands[1] != frame_pointer_rtx
&& CONST_INT_P (operands[3])
- && (INTVAL (operands[3]) == 1
- || INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)"
+ && (INTVAL (operands[3]) == 1 || INTVAL (operands[3]) == 2)"
"addi %2%T3,%0"
[(set_attr "slottable" "yes")
(set_attr "cc" "none")])
+;; The mult-vs-ashift canonicalization-cleanup plagues us: nothing in
+;; reload transforms a "scaled multiplication" into an ashift in a
+;; reloaded address; it's passed as-is and expected to be recognized,
+;; or else we get a tell-tale "unrecognizable insn".
+;; On top of that, we *should* match the bare insn, as a *matching
+;; pattern* (as opposed to e.g. a reload_load_address expander
+;; changing the mul into an ashift), so can_reload_into will re-use
+;; registers in the reloaded expression instead of allocating a new
+;; register.
+(define_insn_and_split "*addi_reload"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI
+ (mult:SI (match_operand:SI 2 "register_operand" "r")
+ (match_operand:SI 3 "const_int_operand" "n"))
+ (match_operand:SI 1 "register_operand" "0")))]
+ "operands[0] != frame_pointer_rtx
+ && operands[1] != frame_pointer_rtx
+ && CONST_INT_P (operands[3])
+ && (INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)
+ && (reload_in_progress || reload_completed)"
+ "#"
+ ""
+ [(set (match_dup 0)
+ (plus:SI (ashift:SI (match_dup 2) (match_dup 3)) (match_dup 1)))]
+ "operands[3] = operands[3] == const2_rtx ? const1_rtx : const2_rtx;")
+
;; This pattern is usually generated after reload, so a '%' is
;; ineffective; use explicit combinations.
(define_insn "*addi_b_<mode>"
diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
index 2120eb6..f5ef824 100644
--- a/gcc/config/darwin-protos.h
+++ b/gcc/config/darwin-protos.h
@@ -70,6 +70,7 @@ extern void darwin_non_lazy_pcrel (FILE *, rtx);
extern void darwin_emit_unwind_label (FILE *, tree, int, int);
extern void darwin_emit_except_table_label (FILE *);
extern rtx darwin_make_eh_symbol_indirect (rtx, bool);
+extern bool darwin_should_restore_cfa_state (void);
extern void darwin_pragma_ignore (struct cpp_reader *);
extern void darwin_pragma_options (struct cpp_reader *);
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index 119f319..e2e60bb 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -2236,6 +2236,16 @@ darwin_make_eh_symbol_indirect (rtx orig, bool ARG_UNUSED (pubvis))
/*stub_p=*/false));
}
+/* The unwinders in earlier Darwin versions are based on an old version
+ of libgcc_s and need current frame address stateto be reset after a
+ DW_CFA_restore_state recovers the register values. */
+
+bool
+darwin_should_restore_cfa_state (void)
+{
+ return generating_for_darwin_version <= 10;
+}
+
/* Return, and mark as used, the name of the stub for the mcount function.
Currently, this is only called by X86 code in the expansion of the
FUNCTION_PROFILER macro, when stubs are enabled. */
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 5a9fb43f..d2b2c14 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -614,6 +614,11 @@ extern GTY(()) int darwin_ms_struct;
/* Make an EH (personality or LDSA) symbol indirect as needed. */
#define TARGET_ASM_MAKE_EH_SYMBOL_INDIRECT darwin_make_eh_symbol_indirect
+/* Some of Darwin's unwinders need current frame address state to be reset
+ after a DW_CFA_restore_state recovers the register values. */
+#undef TARGET_ASM_SHOULD_RESTORE_CFA_STATE
+#define TARGET_ASM_SHOULD_RESTORE_CFA_STATE darwin_should_restore_cfa_state
+
/* Our profiling scheme doesn't LP labels and counter words. */
#define NO_PROFILE_COUNTERS 1
diff --git a/gcc/config/host-darwin.c b/gcc/config/host-darwin.c
index 1816c61..b101fca 100644
--- a/gcc/config/host-darwin.c
+++ b/gcc/config/host-darwin.c
@@ -61,7 +61,8 @@ darwin_gt_pch_use_address (void *addr, size_t sz, int fd, size_t off)
sz = (sz + pagesize - 1) / pagesize * pagesize;
if (munmap (pch_address_space + sz, sizeof (pch_address_space) - sz) != 0)
- fatal_error (input_location, "couldn%'t unmap pch_address_space: %m");
+ fatal_error (input_location,
+ "could not unmap %<pch_address_space%> %m");
if (ret)
{
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 0ccefa8..a8db33e 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -110,7 +110,7 @@
;; v any EVEX encodable SSE register for AVX512VL target,
;; otherwise any SSE register
;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
-;; target.
+;; target, otherwise any SSE register.
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
@@ -148,8 +148,8 @@
"@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
(define_register_constraint "Yw"
- "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS"
- "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target.")
+ "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.")
;; We use the B prefix to denote any number of internal operands:
;; f FLAGS_REG
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index cdeabbf..e93935f 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -2159,11 +2159,11 @@ ix86_option_override_internal (bool main_args_p,
&& !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_MOVBE))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_MOVBE;
if (((processor_alias_table[i].flags & PTA_AES) != 0)
- && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
- ix86_isa_flags |= OPTION_MASK_ISA_AES;
+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AES;
if (((processor_alias_table[i].flags & PTA_SHA) != 0)
- && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
- ix86_isa_flags |= OPTION_MASK_ISA_SHA;
+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SHA;
if (((processor_alias_table[i].flags & PTA_PCLMUL) != 0)
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
@@ -2354,6 +2354,13 @@ ix86_option_override_internal (bool main_args_p,
if (((processor_alias_table[i].flags & PTA_PKU) != 0)
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
+ if (((processor_alias_table[i].flags & PTA_UINTR) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_UINTR))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_UINTR;
+ if (((processor_alias_table[i].flags & PTA_HRESET) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_HRESET))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_HRESET;
+
/* Don't enable x87 instructions if only general registers are
allowed by target("general-regs-only") function attribute or
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 48f9aa0..2603333 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6490,11 +6490,6 @@ ix86_compute_frame_layout (void)
offset += frame->nregs * UNITS_PER_WORD;
frame->reg_save_offset = offset;
- /* On SEH target, registers are pushed just before the frame pointer
- location. */
- if (TARGET_SEH)
- frame->hard_frame_pointer_offset = offset;
-
/* Calculate the size of the va-arg area (not including padding, if any). */
frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
@@ -6660,14 +6655,22 @@ ix86_compute_frame_layout (void)
the unwind data structure. */
if (TARGET_SEH)
{
- HOST_WIDE_INT diff;
+ /* Force the frame pointer to point at or below the lowest register save
+ area, see the SEH code in config/i386/winnt.c for the rationale. */
+ frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
- /* If we can leave the frame pointer where it is, do so. Also, returns
- the establisher frame for __builtin_frame_address (0). */
- diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
- if (diff <= SEH_MAX_FRAME_SIZE
- && (diff > 240 || (diff & 15) != 0)
- && !crtl->accesses_prior_frames)
+ /* If we can leave the frame pointer where it is, do so. Also, return
+ the establisher frame for __builtin_frame_address (0) or else if the
+ frame overflows the SEH maximum frame size. */
+ const HOST_WIDE_INT diff
+ = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
+ if (diff <= 255)
+ {
+ /* The resulting diff will be a multiple of 16 lower than 255,
+ i.e. at most 240 as required by the unwind data structure. */
+ frame->hard_frame_pointer_offset += (diff & 15);
+ }
+ else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
{
/* Ideally we'd determine what portion of the local stack frame
(within the constraint of the lowest 240) is most heavily used.
@@ -6676,6 +6679,8 @@ ix86_compute_frame_layout (void)
frame that is addressable with 8-bit offsets. */
frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
}
+ else
+ frame->hard_frame_pointer_offset = frame->hfp_save_offset;
}
}
@@ -8336,17 +8341,6 @@ ix86_expand_prologue (void)
insn = emit_insn (gen_push (hard_frame_pointer_rtx));
RTX_FRAME_RELATED_P (insn) = 1;
- /* Push registers now, before setting the frame pointer
- on SEH target. */
- if (!int_registers_saved
- && TARGET_SEH
- && !frame.save_regs_using_mov)
- {
- ix86_emit_save_regs ();
- int_registers_saved = true;
- gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
- }
-
if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
{
insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
@@ -14754,11 +14748,6 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2,
}
}
- /* get_attr_type may modify recog data. We want to make sure
- that recog data is valid for instruction INSN, on which
- distance_non_agu_define is called. INSN is unchanged here. */
- extract_insn_cached (insn);
-
if (!found)
return -1;
@@ -14928,17 +14917,15 @@ ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
return true;
}
- rtx_insn *rinsn = recog_data.insn;
+ /* Remember recog_data content. */
+ struct recog_data_d recog_data_save = recog_data;
dist_define = distance_non_agu_define (regno1, regno2, insn);
dist_use = distance_agu_use (regno0, insn);
- /* distance_non_agu_define can call extract_insn_cached. If this function
- is called from define_split conditions, that can break insn splitting,
- because split_insns works by clearing recog_data.insn and then modifying
- recog_data.operand array and match the various split conditions. */
- if (recog_data.insn != rinsn)
- recog_data.insn = NULL;
+ /* distance_non_agu_define can call get_attr_type which can call
+ recog_memoized, restore recog_data back to previous content. */
+ recog_data = recog_data_save;
if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
{
@@ -14968,38 +14955,6 @@ ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
return dist_define >= dist_use;
}
-/* Return true if it is legal to clobber flags by INSN and
- false otherwise. */
-
-static bool
-ix86_ok_to_clobber_flags (rtx_insn *insn)
-{
- basic_block bb = BLOCK_FOR_INSN (insn);
- df_ref use;
- bitmap live;
-
- while (insn)
- {
- if (NONDEBUG_INSN_P (insn))
- {
- FOR_EACH_INSN_USE (use, insn)
- if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
- return false;
-
- if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
- return true;
- }
-
- if (insn == BB_END (bb))
- break;
-
- insn = NEXT_INSN (insn);
- }
-
- live = df_get_live_out(bb);
- return !REGNO_REG_SET_P (live, FLAGS_REG);
-}
-
/* Return true if we need to split op0 = op1 + op2 into a sequence of
move and add to avoid AGU stalls. */
@@ -15012,10 +14967,6 @@ ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
- /* Check it is correct to split here. */
- if (!ix86_ok_to_clobber_flags(insn))
- return false;
-
regno0 = true_regnum (operands[0]);
regno1 = true_regnum (operands[1]);
regno2 = true_regnum (operands[2]);
@@ -15051,7 +15002,7 @@ ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
}
/* Return true if we need to split lea into a sequence of
- instructions to avoid AGU stalls. */
+ instructions to avoid AGU stalls during peephole2. */
bool
ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
@@ -15071,10 +15022,6 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
&& REG_P (XEXP (operands[1], 0))))
return false;
- /* Check if it is OK to split here. */
- if (!ix86_ok_to_clobber_flags (insn))
- return false;
-
ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);
@@ -21426,9 +21373,10 @@ ix86_c_mode_for_suffix (char suffix)
with the old cc0-based compiler. */
static rtx_insn *
-ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
- vec<const char *> &constraints,
- vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
+ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
+ vec<machine_mode> & /*input_modes*/,
+ vec<const char *> &constraints, vec<rtx> &clobbers,
+ HARD_REG_SET &clobbered_regs)
{
bool saw_asm_flag = false;
@@ -22657,15 +22605,15 @@ ix86_simd_clone_usable (struct cgraph_node *node)
return -1;
if (!TARGET_AVX)
return 0;
- return TARGET_AVX2 ? 2 : 1;
+ return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
case 'c':
if (!TARGET_AVX)
return -1;
- return TARGET_AVX2 ? 1 : 0;
+ return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
case 'd':
if (!TARGET_AVX2)
return -1;
- return 0;
+ return TARGET_AVX512F ? 1 : 0;
case 'e':
if (!TARGET_AVX512F)
return -1;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 272b195..69fddca 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1163,6 +1163,22 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
/* k0, k1, k2, k3, k4, k5, k6, k7*/ \
1, 1, 1, 1, 1, 1, 1, 1 }
+/* Order in which to allocate registers. Each register must be
+ listed once, even those in FIXED_REGISTERS. List frame pointer
+ late and fixed registers last. Note that, in general, we prefer
+ registers listed in CALL_USED_REGISTERS, keeping the others
+ available for storage of persistent values.
+
+ The ADJUST_REG_ALLOC_ORDER actually overwrite the order,
+ so this is just empty initializer for array. */
+
+#define REG_ALLOC_ORDER \
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, \
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, \
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75 }
+
/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
to be rearranged based on a particular function. When using sse math,
we want to allocate SSE before x87 registers and vice versa. */
@@ -2391,157 +2407,160 @@ extern const char *const processor_names[];
#include "wide-int-bitmask.h"
-const wide_int_bitmask PTA_3DNOW (HOST_WIDE_INT_1U << 0);
-const wide_int_bitmask PTA_3DNOW_A (HOST_WIDE_INT_1U << 1);
-const wide_int_bitmask PTA_64BIT (HOST_WIDE_INT_1U << 2);
-const wide_int_bitmask PTA_ABM (HOST_WIDE_INT_1U << 3);
-const wide_int_bitmask PTA_AES (HOST_WIDE_INT_1U << 4);
-const wide_int_bitmask PTA_AVX (HOST_WIDE_INT_1U << 5);
-const wide_int_bitmask PTA_BMI (HOST_WIDE_INT_1U << 6);
-const wide_int_bitmask PTA_CX16 (HOST_WIDE_INT_1U << 7);
-const wide_int_bitmask PTA_F16C (HOST_WIDE_INT_1U << 8);
-const wide_int_bitmask PTA_FMA (HOST_WIDE_INT_1U << 9);
-const wide_int_bitmask PTA_FMA4 (HOST_WIDE_INT_1U << 10);
-const wide_int_bitmask PTA_FSGSBASE (HOST_WIDE_INT_1U << 11);
-const wide_int_bitmask PTA_LWP (HOST_WIDE_INT_1U << 12);
-const wide_int_bitmask PTA_LZCNT (HOST_WIDE_INT_1U << 13);
-const wide_int_bitmask PTA_MMX (HOST_WIDE_INT_1U << 14);
-const wide_int_bitmask PTA_MOVBE (HOST_WIDE_INT_1U << 15);
-const wide_int_bitmask PTA_NO_SAHF (HOST_WIDE_INT_1U << 16);
-const wide_int_bitmask PTA_PCLMUL (HOST_WIDE_INT_1U << 17);
-const wide_int_bitmask PTA_POPCNT (HOST_WIDE_INT_1U << 18);
-const wide_int_bitmask PTA_PREFETCH_SSE (HOST_WIDE_INT_1U << 19);
-const wide_int_bitmask PTA_RDRND (HOST_WIDE_INT_1U << 20);
-const wide_int_bitmask PTA_SSE (HOST_WIDE_INT_1U << 21);
-const wide_int_bitmask PTA_SSE2 (HOST_WIDE_INT_1U << 22);
-const wide_int_bitmask PTA_SSE3 (HOST_WIDE_INT_1U << 23);
-const wide_int_bitmask PTA_SSE4_1 (HOST_WIDE_INT_1U << 24);
-const wide_int_bitmask PTA_SSE4_2 (HOST_WIDE_INT_1U << 25);
-const wide_int_bitmask PTA_SSE4A (HOST_WIDE_INT_1U << 26);
-const wide_int_bitmask PTA_SSSE3 (HOST_WIDE_INT_1U << 27);
-const wide_int_bitmask PTA_TBM (HOST_WIDE_INT_1U << 28);
-const wide_int_bitmask PTA_XOP (HOST_WIDE_INT_1U << 29);
-const wide_int_bitmask PTA_AVX2 (HOST_WIDE_INT_1U << 30);
-const wide_int_bitmask PTA_BMI2 (HOST_WIDE_INT_1U << 31);
-const wide_int_bitmask PTA_RTM (HOST_WIDE_INT_1U << 32);
-const wide_int_bitmask PTA_HLE (HOST_WIDE_INT_1U << 33);
-const wide_int_bitmask PTA_PRFCHW (HOST_WIDE_INT_1U << 34);
-const wide_int_bitmask PTA_RDSEED (HOST_WIDE_INT_1U << 35);
-const wide_int_bitmask PTA_ADX (HOST_WIDE_INT_1U << 36);
-const wide_int_bitmask PTA_FXSR (HOST_WIDE_INT_1U << 37);
-const wide_int_bitmask PTA_XSAVE (HOST_WIDE_INT_1U << 38);
-const wide_int_bitmask PTA_XSAVEOPT (HOST_WIDE_INT_1U << 39);
-const wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40);
-const wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41);
-const wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42);
-const wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43);
-const wide_int_bitmask PTA_NO_TUNE (HOST_WIDE_INT_1U << 44);
-const wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45);
-const wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46);
-const wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47);
-const wide_int_bitmask PTA_XSAVEC (HOST_WIDE_INT_1U << 48);
-const wide_int_bitmask PTA_XSAVES (HOST_WIDE_INT_1U << 49);
-const wide_int_bitmask PTA_AVX512DQ (HOST_WIDE_INT_1U << 50);
-const wide_int_bitmask PTA_AVX512BW (HOST_WIDE_INT_1U << 51);
-const wide_int_bitmask PTA_AVX512VL (HOST_WIDE_INT_1U << 52);
-const wide_int_bitmask PTA_AVX512IFMA (HOST_WIDE_INT_1U << 53);
-const wide_int_bitmask PTA_AVX512VBMI (HOST_WIDE_INT_1U << 54);
-const wide_int_bitmask PTA_CLWB (HOST_WIDE_INT_1U << 55);
-const wide_int_bitmask PTA_MWAITX (HOST_WIDE_INT_1U << 56);
-const wide_int_bitmask PTA_CLZERO (HOST_WIDE_INT_1U << 57);
-const wide_int_bitmask PTA_NO_80387 (HOST_WIDE_INT_1U << 58);
-const wide_int_bitmask PTA_PKU (HOST_WIDE_INT_1U << 59);
-const wide_int_bitmask PTA_AVX5124VNNIW (HOST_WIDE_INT_1U << 60);
-const wide_int_bitmask PTA_AVX5124FMAPS (HOST_WIDE_INT_1U << 61);
-const wide_int_bitmask PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1U << 62);
-const wide_int_bitmask PTA_SGX (HOST_WIDE_INT_1U << 63);
-const wide_int_bitmask PTA_AVX512VNNI (0, HOST_WIDE_INT_1U);
-const wide_int_bitmask PTA_GFNI (0, HOST_WIDE_INT_1U << 1);
-const wide_int_bitmask PTA_VAES (0, HOST_WIDE_INT_1U << 2);
-const wide_int_bitmask PTA_AVX512VBMI2 (0, HOST_WIDE_INT_1U << 3);
-const wide_int_bitmask PTA_VPCLMULQDQ (0, HOST_WIDE_INT_1U << 4);
-const wide_int_bitmask PTA_AVX512BITALG (0, HOST_WIDE_INT_1U << 5);
-const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
-const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
-const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
-const wide_int_bitmask PTA_AVX512VP2INTERSECT (0, HOST_WIDE_INT_1U << 9);
-const wide_int_bitmask PTA_PTWRITE (0, HOST_WIDE_INT_1U << 10);
-const wide_int_bitmask PTA_AVX512BF16 (0, HOST_WIDE_INT_1U << 11);
-const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 12);
-const wide_int_bitmask PTA_MOVDIRI (0, HOST_WIDE_INT_1U << 13);
-const wide_int_bitmask PTA_MOVDIR64B (0, HOST_WIDE_INT_1U << 14);
-const wide_int_bitmask PTA_ENQCMD (0, HOST_WIDE_INT_1U << 15);
-const wide_int_bitmask PTA_CLDEMOTE (0, HOST_WIDE_INT_1U << 16);
-const wide_int_bitmask PTA_SERIALIZE (0, HOST_WIDE_INT_1U << 17);
-const wide_int_bitmask PTA_TSXLDTRK (0, HOST_WIDE_INT_1U << 18);
-const wide_int_bitmask PTA_AMX_TILE (0, HOST_WIDE_INT_1U << 19);
-const wide_int_bitmask PTA_AMX_INT8 (0, HOST_WIDE_INT_1U << 20);
-const wide_int_bitmask PTA_AMX_BF16 (0, HOST_WIDE_INT_1U << 21);
-const wide_int_bitmask PTA_UINTR (0, HOST_WIDE_INT_1U << 22);
-const wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23);
-const wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24);
-const wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25);
-const wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26);
-
-const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE
+constexpr wide_int_bitmask PTA_3DNOW (HOST_WIDE_INT_1U << 0);
+constexpr wide_int_bitmask PTA_3DNOW_A (HOST_WIDE_INT_1U << 1);
+constexpr wide_int_bitmask PTA_64BIT (HOST_WIDE_INT_1U << 2);
+constexpr wide_int_bitmask PTA_ABM (HOST_WIDE_INT_1U << 3);
+constexpr wide_int_bitmask PTA_AES (HOST_WIDE_INT_1U << 4);
+constexpr wide_int_bitmask PTA_AVX (HOST_WIDE_INT_1U << 5);
+constexpr wide_int_bitmask PTA_BMI (HOST_WIDE_INT_1U << 6);
+constexpr wide_int_bitmask PTA_CX16 (HOST_WIDE_INT_1U << 7);
+constexpr wide_int_bitmask PTA_F16C (HOST_WIDE_INT_1U << 8);
+constexpr wide_int_bitmask PTA_FMA (HOST_WIDE_INT_1U << 9);
+constexpr wide_int_bitmask PTA_FMA4 (HOST_WIDE_INT_1U << 10);
+constexpr wide_int_bitmask PTA_FSGSBASE (HOST_WIDE_INT_1U << 11);
+constexpr wide_int_bitmask PTA_LWP (HOST_WIDE_INT_1U << 12);
+constexpr wide_int_bitmask PTA_LZCNT (HOST_WIDE_INT_1U << 13);
+constexpr wide_int_bitmask PTA_MMX (HOST_WIDE_INT_1U << 14);
+constexpr wide_int_bitmask PTA_MOVBE (HOST_WIDE_INT_1U << 15);
+constexpr wide_int_bitmask PTA_NO_SAHF (HOST_WIDE_INT_1U << 16);
+constexpr wide_int_bitmask PTA_PCLMUL (HOST_WIDE_INT_1U << 17);
+constexpr wide_int_bitmask PTA_POPCNT (HOST_WIDE_INT_1U << 18);
+constexpr wide_int_bitmask PTA_PREFETCH_SSE (HOST_WIDE_INT_1U << 19);
+constexpr wide_int_bitmask PTA_RDRND (HOST_WIDE_INT_1U << 20);
+constexpr wide_int_bitmask PTA_SSE (HOST_WIDE_INT_1U << 21);
+constexpr wide_int_bitmask PTA_SSE2 (HOST_WIDE_INT_1U << 22);
+constexpr wide_int_bitmask PTA_SSE3 (HOST_WIDE_INT_1U << 23);
+constexpr wide_int_bitmask PTA_SSE4_1 (HOST_WIDE_INT_1U << 24);
+constexpr wide_int_bitmask PTA_SSE4_2 (HOST_WIDE_INT_1U << 25);
+constexpr wide_int_bitmask PTA_SSE4A (HOST_WIDE_INT_1U << 26);
+constexpr wide_int_bitmask PTA_SSSE3 (HOST_WIDE_INT_1U << 27);
+constexpr wide_int_bitmask PTA_TBM (HOST_WIDE_INT_1U << 28);
+constexpr wide_int_bitmask PTA_XOP (HOST_WIDE_INT_1U << 29);
+constexpr wide_int_bitmask PTA_AVX2 (HOST_WIDE_INT_1U << 30);
+constexpr wide_int_bitmask PTA_BMI2 (HOST_WIDE_INT_1U << 31);
+constexpr wide_int_bitmask PTA_RTM (HOST_WIDE_INT_1U << 32);
+constexpr wide_int_bitmask PTA_HLE (HOST_WIDE_INT_1U << 33);
+constexpr wide_int_bitmask PTA_PRFCHW (HOST_WIDE_INT_1U << 34);
+constexpr wide_int_bitmask PTA_RDSEED (HOST_WIDE_INT_1U << 35);
+constexpr wide_int_bitmask PTA_ADX (HOST_WIDE_INT_1U << 36);
+constexpr wide_int_bitmask PTA_FXSR (HOST_WIDE_INT_1U << 37);
+constexpr wide_int_bitmask PTA_XSAVE (HOST_WIDE_INT_1U << 38);
+constexpr wide_int_bitmask PTA_XSAVEOPT (HOST_WIDE_INT_1U << 39);
+constexpr wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40);
+constexpr wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41);
+constexpr wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42);
+constexpr wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43);
+constexpr wide_int_bitmask PTA_NO_TUNE (HOST_WIDE_INT_1U << 44);
+constexpr wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45);
+constexpr wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46);
+constexpr wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47);
+constexpr wide_int_bitmask PTA_XSAVEC (HOST_WIDE_INT_1U << 48);
+constexpr wide_int_bitmask PTA_XSAVES (HOST_WIDE_INT_1U << 49);
+constexpr wide_int_bitmask PTA_AVX512DQ (HOST_WIDE_INT_1U << 50);
+constexpr wide_int_bitmask PTA_AVX512BW (HOST_WIDE_INT_1U << 51);
+constexpr wide_int_bitmask PTA_AVX512VL (HOST_WIDE_INT_1U << 52);
+constexpr wide_int_bitmask PTA_AVX512IFMA (HOST_WIDE_INT_1U << 53);
+constexpr wide_int_bitmask PTA_AVX512VBMI (HOST_WIDE_INT_1U << 54);
+constexpr wide_int_bitmask PTA_CLWB (HOST_WIDE_INT_1U << 55);
+constexpr wide_int_bitmask PTA_MWAITX (HOST_WIDE_INT_1U << 56);
+constexpr wide_int_bitmask PTA_CLZERO (HOST_WIDE_INT_1U << 57);
+constexpr wide_int_bitmask PTA_NO_80387 (HOST_WIDE_INT_1U << 58);
+constexpr wide_int_bitmask PTA_PKU (HOST_WIDE_INT_1U << 59);
+constexpr wide_int_bitmask PTA_AVX5124VNNIW (HOST_WIDE_INT_1U << 60);
+constexpr wide_int_bitmask PTA_AVX5124FMAPS (HOST_WIDE_INT_1U << 61);
+constexpr wide_int_bitmask PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1U << 62);
+constexpr wide_int_bitmask PTA_SGX (HOST_WIDE_INT_1U << 63);
+constexpr wide_int_bitmask PTA_AVX512VNNI (0, HOST_WIDE_INT_1U);
+constexpr wide_int_bitmask PTA_GFNI (0, HOST_WIDE_INT_1U << 1);
+constexpr wide_int_bitmask PTA_VAES (0, HOST_WIDE_INT_1U << 2);
+constexpr wide_int_bitmask PTA_AVX512VBMI2 (0, HOST_WIDE_INT_1U << 3);
+constexpr wide_int_bitmask PTA_VPCLMULQDQ (0, HOST_WIDE_INT_1U << 4);
+constexpr wide_int_bitmask PTA_AVX512BITALG (0, HOST_WIDE_INT_1U << 5);
+constexpr wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
+constexpr wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
+constexpr wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
+constexpr wide_int_bitmask PTA_AVX512VP2INTERSECT (0, HOST_WIDE_INT_1U << 9);
+constexpr wide_int_bitmask PTA_PTWRITE (0, HOST_WIDE_INT_1U << 10);
+constexpr wide_int_bitmask PTA_AVX512BF16 (0, HOST_WIDE_INT_1U << 11);
+constexpr wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 12);
+constexpr wide_int_bitmask PTA_MOVDIRI (0, HOST_WIDE_INT_1U << 13);
+constexpr wide_int_bitmask PTA_MOVDIR64B (0, HOST_WIDE_INT_1U << 14);
+constexpr wide_int_bitmask PTA_ENQCMD (0, HOST_WIDE_INT_1U << 15);
+constexpr wide_int_bitmask PTA_CLDEMOTE (0, HOST_WIDE_INT_1U << 16);
+constexpr wide_int_bitmask PTA_SERIALIZE (0, HOST_WIDE_INT_1U << 17);
+constexpr wide_int_bitmask PTA_TSXLDTRK (0, HOST_WIDE_INT_1U << 18);
+constexpr wide_int_bitmask PTA_AMX_TILE (0, HOST_WIDE_INT_1U << 19);
+constexpr wide_int_bitmask PTA_AMX_INT8 (0, HOST_WIDE_INT_1U << 20);
+constexpr wide_int_bitmask PTA_AMX_BF16 (0, HOST_WIDE_INT_1U << 21);
+constexpr wide_int_bitmask PTA_UINTR (0, HOST_WIDE_INT_1U << 22);
+constexpr wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23);
+constexpr wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24);
+constexpr wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25);
+constexpr wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26);
+
+constexpr wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE
| PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR;
-const wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE & (~PTA_NO_SAHF))
+constexpr wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE
+ & (~PTA_NO_SAHF))
| PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3;
-const wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2
+constexpr wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2
| PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
| PTA_MOVBE | PTA_XSAVE;
-const wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3
+constexpr wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3
| PTA_AVX512F | PTA_AVX512BW | PTA_AVX512CD | PTA_AVX512DQ | PTA_AVX512VL;
-const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
+constexpr wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
| PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
-const wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2
+constexpr wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2
| PTA_POPCNT;
-const wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_PCLMUL;
-const wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE
+constexpr wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_PCLMUL;
+constexpr wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE
| PTA_XSAVEOPT;
-const wide_int_bitmask PTA_IVYBRIDGE = PTA_SANDYBRIDGE | PTA_FSGSBASE
+constexpr wide_int_bitmask PTA_IVYBRIDGE = PTA_SANDYBRIDGE | PTA_FSGSBASE
| PTA_RDRND | PTA_F16C;
-const wide_int_bitmask PTA_HASWELL = PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI
+constexpr wide_int_bitmask PTA_HASWELL = PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI
| PTA_BMI2 | PTA_LZCNT | PTA_FMA | PTA_MOVBE | PTA_HLE;
-const wide_int_bitmask PTA_BROADWELL = PTA_HASWELL | PTA_ADX | PTA_RDSEED
+constexpr wide_int_bitmask PTA_BROADWELL = PTA_HASWELL | PTA_ADX | PTA_RDSEED
| PTA_PRFCHW;
-const wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_AES | PTA_CLFLUSHOPT
- | PTA_XSAVEC | PTA_XSAVES | PTA_SGX;
-const wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F
+constexpr wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_AES
+ | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SGX;
+constexpr wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F
| PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
| PTA_CLWB;
-const wide_int_bitmask PTA_CASCADELAKE = PTA_SKYLAKE_AVX512 | PTA_AVX512VNNI;
-const wide_int_bitmask PTA_COOPERLAKE = PTA_CASCADELAKE | PTA_AVX512BF16;
-const wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F
+constexpr wide_int_bitmask PTA_CASCADELAKE = PTA_SKYLAKE_AVX512
+ | PTA_AVX512VNNI;
+constexpr wide_int_bitmask PTA_COOPERLAKE = PTA_CASCADELAKE | PTA_AVX512BF16;
+constexpr wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F
| PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
| PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA;
-const wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI
+constexpr wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI
| PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG
| PTA_RDPID | PTA_AVX512VPOPCNTDQ;
-const wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT | PTA_PCONFIG
- | PTA_WBNOINVD | PTA_CLWB;
-const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI
+constexpr wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT
+ | PTA_PCONFIG | PTA_WBNOINVD | PTA_CLWB;
+constexpr wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI
| PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT | PTA_KL | PTA_WIDEKL;
-const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI
+constexpr wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI
| PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE
| PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE
| PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI;
-const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE
- | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
-const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
- | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1;
-const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
-const wide_int_bitmask PTA_SILVERMONT = PTA_WESTMERE | PTA_MOVBE | PTA_RDRND
- | PTA_PRFCHW;
-const wide_int_bitmask PTA_GOLDMONT = PTA_SILVERMONT | PTA_AES | PTA_SHA | PTA_XSAVE
- | PTA_RDSEED | PTA_XSAVEC | PTA_XSAVES | PTA_CLFLUSHOPT | PTA_XSAVEOPT
- | PTA_FSGSBASE;
-const wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID
+constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE
+ | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL
+ | PTA_WIDEKL | PTA_AVXVNNI;
+constexpr wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF
+ | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1;
+constexpr wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
+constexpr wide_int_bitmask PTA_SILVERMONT = PTA_WESTMERE | PTA_MOVBE
+ | PTA_RDRND | PTA_PRFCHW;
+constexpr wide_int_bitmask PTA_GOLDMONT = PTA_SILVERMONT | PTA_AES | PTA_SHA
+ | PTA_XSAVE | PTA_RDSEED | PTA_XSAVEC | PTA_XSAVES | PTA_CLFLUSHOPT
+ | PTA_XSAVEOPT | PTA_FSGSBASE;
+constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID
| PTA_SGX | PTA_PTWRITE;
-const wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
+constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
| PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG;
-const wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
+constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
#ifndef GENERATOR_FILE
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b60784a..2820f6d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5176,7 +5176,7 @@
;; Load effective address instructions
-(define_insn_and_split "*lea<mode>"
+(define_insn "*lea<mode>"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
"ix86_hardreg_mov_ok (operands[0], operands[1])"
@@ -5189,38 +5189,36 @@
else
return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
}
- "reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
+ [(set_attr "type" "lea")
+ (set (attr "mode")
+ (if_then_else
+ (match_operand 1 "SImode_address_operand")
+ (const_string "SI")
+ (const_string "<MODE>")))])
+
+(define_peephole2
+ [(set (match_operand:SWI48 0 "register_operand")
+ (match_operand:SWI48 1 "address_no_seg_operand"))]
+ "ix86_hardreg_mov_ok (operands[0], operands[1])
+ && peep2_regno_dead_p (0, FLAGS_REG)
+ && ix86_avoid_lea_for_addr (peep2_next_insn (0), operands)"
[(const_int 0)]
{
machine_mode mode = <MODE>mode;
- rtx pat;
-
- /* ix86_avoid_lea_for_addr re-recognizes insn and may
- change operands[] array behind our back. */
- pat = PATTERN (curr_insn);
-
- operands[0] = SET_DEST (pat);
- operands[1] = SET_SRC (pat);
/* Emit all operations in SImode for zero-extended addresses. */
if (SImode_address_operand (operands[1], VOIDmode))
mode = SImode;
- ix86_split_lea_for_addr (curr_insn, operands, mode);
+ ix86_split_lea_for_addr (peep2_next_insn (0), operands, mode);
/* Zero-extend return register to DImode for zero-extended addresses. */
if (mode != <MODE>mode)
- emit_insn (gen_zero_extendsidi2
- (operands[0], gen_lowpart (mode, operands[0])));
+ emit_insn (gen_zero_extendsidi2 (operands[0],
+ gen_lowpart (mode, operands[0])));
DONE;
-}
- [(set_attr "type" "lea")
- (set (attr "mode")
- (if_then_else
- (match_operand 1 "SImode_address_operand")
- (const_string "SI")
- (const_string "<MODE>")))])
+})
;; Add instructions
@@ -19845,7 +19843,16 @@
(match_operator 3 "commutative_operator"
[(match_dup 0)
(match_operand 2 "memory_operand")]))]
- "REGNO (operands[0]) != REGNO (operands[1])"
+ "REGNO (operands[0]) != REGNO (operands[1])
+ /* Punt if operands[1] is %[xy]mm16+ and AVX512BW is not enabled,
+ as EVEX encoded vpadd[bw], vpmullw, vpmin[su][bw] and vpmax[su][bw]
+ instructions require AVX512BW and AVX512VL, but with the original
+ instructions it might require just AVX512VL.
+ AVX512VL is implied from TARGET_HARD_REGNO_MODE_OK. */
+ && (!EXT_REX_SSE_REGNO_P (REGNO (operands[1]))
+ || TARGET_AVX512BW
+ || GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (operands[0]))) > 2
+ || logic_operator (operands[3], VOIDmode))"
[(set (match_dup 0) (match_dup 2))
(set (match_dup 0)
(match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9e5a4d1..c6a2882 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2021,9 +2021,9 @@
})
(define_insn "mmx_pshufw_1"
- [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
(vec_select:V4HI
- (match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw")
+ (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -2076,6 +2076,17 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
+;; Optimize V2SImode load from memory, swapping the elements and
+;; storing back into the memory into DImode rotate of the memory by 32.
+(define_split
+ [(set (match_operand:V2SI 0 "memory_operand")
+ (vec_select:V2SI (match_dup 0)
+ (parallel [(const_int 1) (const_int 0)])))]
+ "TARGET_64BIT && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
+ [(set (match_dup 0)
+ (rotate:DI (match_dup 0) (const_int 32)))]
+ "operands[0] = adjust_address (operands[0], DImode, 0);")
+
(define_insn "mmx_pswapdv2si2"
[(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
@@ -2094,10 +2105,10 @@
(set_attr "mode" "DI,TI")])
(define_insn "*vec_dupv4hi"
- [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
(vec_duplicate:V4HI
(truncate:HI
- (match_operand:SI 1 "register_operand" "0,xYw"))))]
+ (match_operand:SI 1 "register_operand" "0,Yw"))))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
"@
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index ee42ba2..b6dd5e9 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1486,6 +1486,10 @@
(define_predicate "div_operator"
(match_code "div"))
+;; Return true if this is a and, ior or xor operation.
+(define_predicate "logic_operator"
+ (match_code "and,ior,xor"))
+
;; Return true if this is a plus, minus, and, ior or xor operation.
(define_predicate "plusminuslogic_operator"
(match_code "plus,minus,and,ior,xor"))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index db5be59..ca4372d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -560,6 +560,14 @@
(V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
(V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
+(define_mode_attr v_Yw
+ [(V16QI "Yw") (V32QI "Yw") (V64QI "v")
+ (V8HI "Yw") (V16HI "Yw") (V32HI "v")
+ (V4SI "v") (V8SI "v") (V16SI "v")
+ (V2DI "v") (V4DI "v") (V8DI "v")
+ (V4SF "v") (V8SF "v") (V16SF "v")
+ (V2DF "v") (V4DF "v") (V8DF "v")])
+
(define_mode_attr sse2_avx_avx512f
[(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
(V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
@@ -11677,10 +11685,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<insn><mode>3"
- [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
+ [(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_Yw>")
(plusminus:VI_AVX2
- (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
- (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
+ (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_Yw>")
+ (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_Yw>mBr")))]
"TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -11790,9 +11798,9 @@
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
(define_insn "*mul<mode>3<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
- (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
- (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
+ (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>")
+ (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
&& <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
@@ -12618,10 +12626,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*avx2_<code><mode>3"
- [(set (match_operand:VI124_256 0 "register_operand" "=v")
+ [(set (match_operand:VI124_256 0 "register_operand" "=<v_Yw>")
(maxmin:VI124_256
- (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
- (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
+ (match_operand:VI124_256 1 "nonimmediate_operand" "%<v_Yw>")
+ (match_operand:VI124_256 2 "nonimmediate_operand" "<v_Yw>m")))]
"TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseiadd")
@@ -12745,10 +12753,10 @@
})
(define_insn "*sse4_1_<code><mode>3<mask_name>"
- [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
+ [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_Yw>")
(smaxmin:VI14_128
- (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
- (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
+ (match_operand:VI14_128 1 "vector_operand" "%0,0,<v_Yw>")
+ (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
"TARGET_SSE4_1
&& <mask_mode512bit_condition>
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
@@ -12830,10 +12838,10 @@
})
(define_insn "*sse4_1_<code><mode>3<mask_name>"
- [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
+ [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_Yw>")
(umaxmin:VI24_128
- (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
- (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
+ (match_operand:VI24_128 1 "vector_operand" "%0,0,<v_Yw>")
+ (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
"TARGET_SSE4_1
&& <mask_mode512bit_condition>
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index adc3f36..cc12196 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -830,9 +830,20 @@ i386_pe_asm_lto_end (void)
struct seh_frame_state
{
- /* SEH records saves relative to the "current" stack pointer, whether
- or not there's a frame pointer in place. This tracks the current
- stack pointer offset from the CFA. */
+ /* SEH records offsets relative to the lowest address of the fixed stack
+ allocation. If there is no frame pointer, these offsets are from the
+ stack pointer; if there is a frame pointer, these offsets are from the
+ value of the stack pointer when the frame pointer was established, i.e.
+ the frame pointer minus the offset in the .seh_setframe directive.
+
+ We do not distinguish these two cases, i.e. we consider that the offsets
+ are always relative to the "current" stack pointer. This means that we
+ need to perform the fixed stack allocation before establishing the frame
+ pointer whenever there are registers to be saved, and this is guaranteed
+ by the prologue provided that we force the frame pointer to point at or
+ below the lowest used register save area, see ix86_compute_frame_layout.
+
+ This tracks the current stack pointer offset from the CFA. */
HOST_WIDE_INT sp_offset;
/* The CFA is located at CFA_REG + CFA_OFFSET. */
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index ebb04b7..3155459 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -2381,7 +2381,7 @@ mips_symbol_insns (enum mips_symbol_type type, machine_mode mode)
{
/* MSA LD.* and ST.* cannot support loading symbols via an immediate
operand. */
- if (MSA_SUPPORTED_MODE_P (mode))
+ if (mode != MAX_MACHINE_MODE && MSA_SUPPORTED_MODE_P (mode))
return 0;
return mips_symbol_insns_1 (type, mode) * (TARGET_MIPS16 ? 2 : 1);
@@ -8400,7 +8400,7 @@ mips_expand_ext_as_unaligned_load (rtx dest, rtx src, HOST_WIDE_INT width,
/* If TARGET_64BIT, the destination of a 32-bit "extz" or "extzv" will
be a DImode, create a new temp and emit a zero extend at the end. */
if (GET_MODE (dest) == DImode
- && REG_P (dest)
+ && (REG_P (dest) || (SUBREG_P (dest) && !MEM_P (SUBREG_REG (dest))))
&& GET_MODE_BITSIZE (SImode) == width)
{
dest1 = dest;
diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c
index bdacade..c1c2e6e 100644
--- a/gcc/config/mn10300/mn10300.c
+++ b/gcc/config/mn10300/mn10300.c
@@ -2847,9 +2847,10 @@ mn10300_conditional_register_usage (void)
with the old cc0-based compiler. */
static rtx_insn *
-mn10300_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
- vec<const char *> &/*constraints*/,
- vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
+mn10300_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
+ vec<machine_mode> & /*input_modes*/,
+ vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
+ HARD_REG_SET &clobbered_regs)
{
clobbers.safe_push (gen_rtx_REG (CCmode, CC_REG));
SET_HARD_REG_BIT (clobbered_regs, CC_REG);
diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c
index 226da0b..7217d78 100644
--- a/gcc/config/nds32/nds32.c
+++ b/gcc/config/nds32/nds32.c
@@ -4197,6 +4197,7 @@ nds32_option_override (void)
static rtx_insn *
nds32_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED,
vec<rtx> &inputs ATTRIBUTE_UNUSED,
+ vec<machine_mode> &input_modes ATTRIBUTE_UNUSED,
vec<const char *> &constraints ATTRIBUTE_UNUSED,
vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
{
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 3921b5c..d7fcd11 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -293,7 +293,7 @@ static size_t n_deferred_plabels = 0;
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
-#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
#undef TARGET_ASM_FILE_END
#define TARGET_ASM_FILE_END pa_file_end
@@ -8461,12 +8461,15 @@ pa_is_function_label_plus_const (rtx op)
&& GET_CODE (XEXP (op, 1)) == CONST_INT);
}
-/* Output assembly code for a thunk to FUNCTION. */
+/* Output the assembler code for a thunk function. THUNK_DECL is the
+ declaration for the thunk function itself, FUNCTION is the decl for
+ the target function. DELTA is an immediate constant offset to be
+ added to THIS. If VCALL_OFFSET is nonzero, the word at
+ *(*this + vcall_offset) should be added to THIS. */
static void
pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
- HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
- tree function)
+ HOST_WIDE_INT vcall_offset, tree function)
{
const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
static unsigned int current_thunk_number;
@@ -8482,201 +8485,386 @@ pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
assemble_start_function (thunk_fndecl, fnname);
final_start_function (emit_barrier (), file, 1);
- /* Output the thunk. We know that the function is in the same
- translation unit (i.e., the same space) as the thunk, and that
- thunks are output after their method. Thus, we don't need an
- external branch to reach the function. With SOM and GAS,
- functions and thunks are effectively in different sections.
- Thus, we can always use a IA-relative branch and the linker
- will add a long branch stub if necessary.
-
- However, we have to be careful when generating PIC code on the
- SOM port to ensure that the sequence does not transfer to an
- import stub for the target function as this could clobber the
- return value saved at SP-24. This would also apply to the
- 32-bit linux port if the multi-space model is implemented. */
- if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
- && !(flag_pic && TREE_PUBLIC (function))
- && (TARGET_GAS || last_address < 262132))
- || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
- && ((targetm_common.have_named_sections
- && DECL_SECTION_NAME (thunk_fndecl) != NULL
- /* The GNU 64-bit linker has rather poor stub management.
- So, we use a long branch from thunks that aren't in
- the same section as the target function. */
- && ((!TARGET_64BIT
- && (DECL_SECTION_NAME (thunk_fndecl)
- != DECL_SECTION_NAME (function)))
- || ((DECL_SECTION_NAME (thunk_fndecl)
- == DECL_SECTION_NAME (function))
- && last_address < 262132)))
- /* In this case, we need to be able to reach the start of
- the stub table even though the function is likely closer
- and can be jumped to directly. */
- || (targetm_common.have_named_sections
- && DECL_SECTION_NAME (thunk_fndecl) == NULL
- && DECL_SECTION_NAME (function) == NULL
- && total_code_bytes < MAX_PCREL17F_OFFSET)
- /* Likewise. */
- || (!targetm_common.have_named_sections
- && total_code_bytes < MAX_PCREL17F_OFFSET))))
- {
- if (!val_14)
- output_asm_insn ("addil L'%2,%%r26", xoperands);
-
- output_asm_insn ("b %0", xoperands);
-
- if (val_14)
- {
- output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
- nbytes += 8;
+ if (!vcall_offset)
+ {
+ /* Output the thunk. We know that the function is in the same
+ translation unit (i.e., the same space) as the thunk, and that
+ thunks are output after their method. Thus, we don't need an
+ external branch to reach the function. With SOM and GAS,
+ functions and thunks are effectively in different sections.
+ Thus, we can always use a IA-relative branch and the linker
+ will add a long branch stub if necessary.
+
+ However, we have to be careful when generating PIC code on the
+ SOM port to ensure that the sequence does not transfer to an
+ import stub for the target function as this could clobber the
+ return value saved at SP-24. This would also apply to the
+ 32-bit linux port if the multi-space model is implemented. */
+ if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
+ && !(flag_pic && TREE_PUBLIC (function))
+ && (TARGET_GAS || last_address < 262132))
+ || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
+ && ((targetm_common.have_named_sections
+ && DECL_SECTION_NAME (thunk_fndecl) != NULL
+ /* The GNU 64-bit linker has rather poor stub management.
+ So, we use a long branch from thunks that aren't in
+ the same section as the target function. */
+ && ((!TARGET_64BIT
+ && (DECL_SECTION_NAME (thunk_fndecl)
+ != DECL_SECTION_NAME (function)))
+ || ((DECL_SECTION_NAME (thunk_fndecl)
+ == DECL_SECTION_NAME (function))
+ && last_address < 262132)))
+ /* In this case, we need to be able to reach the start of
+ the stub table even though the function is likely closer
+ and can be jumped to directly. */
+ || (targetm_common.have_named_sections
+ && DECL_SECTION_NAME (thunk_fndecl) == NULL
+ && DECL_SECTION_NAME (function) == NULL
+ && total_code_bytes < MAX_PCREL17F_OFFSET)
+ /* Likewise. */
+ || (!targetm_common.have_named_sections
+ && total_code_bytes < MAX_PCREL17F_OFFSET))))
+ {
+ if (!val_14)
+ output_asm_insn ("addil L'%2,%%r26", xoperands);
+
+ output_asm_insn ("b %0", xoperands);
+
+ if (val_14)
+ {
+ output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+ nbytes += 8;
+ }
+ else
+ {
+ output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+ nbytes += 12;
+ }
}
- else
+ else if (TARGET_64BIT)
{
- output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
- nbytes += 12;
- }
- }
- else if (TARGET_64BIT)
- {
- rtx xop[4];
+ rtx xop[4];
+
+ /* We only have one call-clobbered scratch register, so we can't
+ make use of the delay slot if delta doesn't fit in 14 bits. */
+ if (!val_14)
+ {
+ output_asm_insn ("addil L'%2,%%r26", xoperands);
+ output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+ }
+
+ /* Load function address into %r1. */
+ xop[0] = xoperands[0];
+ xop[1] = gen_rtx_REG (Pmode, 1);
+ xop[2] = xop[1];
+ pa_output_pic_pcrel_sequence (xop);
- /* We only have one call-clobbered scratch register, so we can't
- make use of the delay slot if delta doesn't fit in 14 bits. */
- if (!val_14)
+ if (val_14)
+ {
+ output_asm_insn ("bv %%r0(%%r1)", xoperands);
+ output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+ nbytes += 20;
+ }
+ else
+ {
+ output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
+ nbytes += 24;
+ }
+ }
+ else if (TARGET_PORTABLE_RUNTIME)
{
- output_asm_insn ("addil L'%2,%%r26", xoperands);
- output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+ output_asm_insn ("ldil L'%0,%%r1", xoperands);
+ output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
+
+ if (!val_14)
+ output_asm_insn ("ldil L'%2,%%r26", xoperands);
+
+ output_asm_insn ("bv %%r0(%%r22)", xoperands);
+
+ if (val_14)
+ {
+ output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+ nbytes += 16;
+ }
+ else
+ {
+ output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
+ nbytes += 20;
+ }
}
+ else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
+ {
+ /* The function is accessible from outside this module. The only
+ way to avoid an import stub between the thunk and function is to
+ call the function directly with an indirect sequence similar to
+ that used by $$dyncall. This is possible because $$dyncall acts
+ as the import stub in an indirect call. */
+ ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
+ xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
+ output_asm_insn ("addil LT'%3,%%r19", xoperands);
+ output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
+ output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
+ output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
+ output_asm_insn ("depi 0,31,2,%%r22", xoperands);
+ output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
+ output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
+
+ if (!val_14)
+ {
+ output_asm_insn ("addil L'%2,%%r26", xoperands);
+ nbytes += 4;
+ }
- /* Load function address into %r1. */
- xop[0] = xoperands[0];
- xop[1] = gen_rtx_REG (Pmode, 1);
- xop[2] = xop[1];
- pa_output_pic_pcrel_sequence (xop);
+ if (TARGET_PA_20)
+ {
+ output_asm_insn ("bve (%%r22)", xoperands);
+ nbytes += 36;
+ }
+ else if (TARGET_NO_SPACE_REGS)
+ {
+ output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
+ nbytes += 36;
+ }
+ else
+ {
+ output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
+ output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
+ output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
+ nbytes += 44;
+ }
- if (val_14)
- {
- output_asm_insn ("bv %%r0(%%r1)", xoperands);
- output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
- nbytes += 20;
+ if (val_14)
+ output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+ else
+ output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
}
- else
+ else if (flag_pic)
{
- output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
- nbytes += 24;
- }
- }
- else if (TARGET_PORTABLE_RUNTIME)
- {
- output_asm_insn ("ldil L'%0,%%r1", xoperands);
- output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
+ rtx xop[4];
- if (!val_14)
- output_asm_insn ("ldil L'%2,%%r26", xoperands);
+ /* Load function address into %r22. */
+ xop[0] = xoperands[0];
+ xop[1] = gen_rtx_REG (Pmode, 1);
+ xop[2] = gen_rtx_REG (Pmode, 22);
+ pa_output_pic_pcrel_sequence (xop);
- output_asm_insn ("bv %%r0(%%r22)", xoperands);
+ if (!val_14)
+ output_asm_insn ("addil L'%2,%%r26", xoperands);
- if (val_14)
- {
- output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
- nbytes += 16;
+ output_asm_insn ("bv %%r0(%%r22)", xoperands);
+
+ if (val_14)
+ {
+ output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+ nbytes += 20;
+ }
+ else
+ {
+ output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+ nbytes += 24;
+ }
}
else
{
- output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
- nbytes += 20;
+ if (!val_14)
+ output_asm_insn ("addil L'%2,%%r26", xoperands);
+
+ output_asm_insn ("ldil L'%0,%%r22", xoperands);
+ output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
+
+ if (val_14)
+ {
+ output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+ nbytes += 12;
+ }
+ else
+ {
+ output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+ nbytes += 16;
+ }
}
}
- else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
+ else
{
- /* The function is accessible from outside this module. The only
- way to avoid an import stub between the thunk and function is to
- call the function directly with an indirect sequence similar to
- that used by $$dyncall. This is possible because $$dyncall acts
- as the import stub in an indirect call. */
- ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
- xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
- output_asm_insn ("addil LT'%3,%%r19", xoperands);
- output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
- output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
- output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
- output_asm_insn ("depi 0,31,2,%%r22", xoperands);
- output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
- output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
+ rtx xop[4];
- if (!val_14)
+ /* Add DELTA to THIS. */
+ if (val_14)
{
- output_asm_insn ("addil L'%2,%%r26", xoperands);
+ output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
nbytes += 4;
}
-
- if (TARGET_PA_20)
+ else
{
- output_asm_insn ("bve (%%r22)", xoperands);
- nbytes += 36;
+ output_asm_insn ("addil L'%2,%%r26", xoperands);
+ output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+ nbytes += 8;
}
- else if (TARGET_NO_SPACE_REGS)
+
+ if (TARGET_64BIT)
{
- output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
- nbytes += 36;
+ /* Load *(THIS + DELTA) to %r1. */
+ output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
+
+ val_14 = VAL_14_BITS_P (vcall_offset);
+ xoperands[2] = GEN_INT (vcall_offset);
+
+ /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
+ if (val_14)
+ {
+ output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
+ nbytes += 8;
+ }
+ else
+ {
+ output_asm_insn ("addil L'%2,%%r1", xoperands);
+ output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
+ nbytes += 12;
+ }
}
else
{
- output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
- output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
- output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
- nbytes += 44;
- }
+ /* Load *(THIS + DELTA) to %r1. */
+ output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
- if (val_14)
- output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
- else
- output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
- }
- else if (flag_pic)
- {
- rtx xop[4];
-
- /* Load function address into %r22. */
- xop[0] = xoperands[0];
- xop[1] = gen_rtx_REG (Pmode, 1);
- xop[2] = gen_rtx_REG (Pmode, 22);
- pa_output_pic_pcrel_sequence (xop);
+ val_14 = VAL_14_BITS_P (vcall_offset);
+ xoperands[2] = GEN_INT (vcall_offset);
- if (!val_14)
- output_asm_insn ("addil L'%2,%%r26", xoperands);
-
- output_asm_insn ("bv %%r0(%%r22)", xoperands);
+ /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
+ if (val_14)
+ {
+ output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
+ nbytes += 8;
+ }
+ else
+ {
+ output_asm_insn ("addil L'%2,%%r1", xoperands);
+ output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
+ nbytes += 12;
+ }
+ }
- if (val_14)
+ /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */
+ if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
+ && !(flag_pic && TREE_PUBLIC (function))
+ && (TARGET_GAS || last_address < 262132))
+ || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
+ && ((targetm_common.have_named_sections
+ && DECL_SECTION_NAME (thunk_fndecl) != NULL
+ /* The GNU 64-bit linker has rather poor stub management.
+ So, we use a long branch from thunks that aren't in
+ the same section as the target function. */
+ && ((!TARGET_64BIT
+ && (DECL_SECTION_NAME (thunk_fndecl)
+ != DECL_SECTION_NAME (function)))
+ || ((DECL_SECTION_NAME (thunk_fndecl)
+ == DECL_SECTION_NAME (function))
+ && last_address < 262132)))
+ /* In this case, we need to be able to reach the start of
+ the stub table even though the function is likely closer
+ and can be jumped to directly. */
+ || (targetm_common.have_named_sections
+ && DECL_SECTION_NAME (thunk_fndecl) == NULL
+ && DECL_SECTION_NAME (function) == NULL
+ && total_code_bytes < MAX_PCREL17F_OFFSET)
+ /* Likewise. */
+ || (!targetm_common.have_named_sections
+ && total_code_bytes < MAX_PCREL17F_OFFSET))))
{
- output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
- nbytes += 20;
+ nbytes += 4;
+ output_asm_insn ("b %0", xoperands);
+
+ /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
+ output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
}
- else
+ else if (TARGET_64BIT)
{
- output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
- nbytes += 24;
- }
- }
- else
- {
- if (!val_14)
- output_asm_insn ("addil L'%2,%%r26", xoperands);
+ /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
+ output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
- output_asm_insn ("ldil L'%0,%%r22", xoperands);
- output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
+ /* Load function address into %r1. */
+ nbytes += 16;
+ xop[0] = xoperands[0];
+ xop[1] = gen_rtx_REG (Pmode, 1);
+ xop[2] = xop[1];
+ pa_output_pic_pcrel_sequence (xop);
- if (val_14)
+ output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
+ }
+ else if (TARGET_PORTABLE_RUNTIME)
{
- output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+ /* Load function address into %r22. */
nbytes += 12;
+ output_asm_insn ("ldil L'%0,%%r22", xoperands);
+ output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
+
+ output_asm_insn ("bv %%r0(%%r22)", xoperands);
+
+ /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
+ output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
+ }
+ else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
+ {
+ /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
+ output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
+
+ /* The function is accessible from outside this module. The only
+ way to avoid an import stub between the thunk and function is to
+ call the function directly with an indirect sequence similar to
+ that used by $$dyncall. This is possible because $$dyncall acts
+ as the import stub in an indirect call. */
+ ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
+ xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
+ output_asm_insn ("addil LT'%3,%%r19", xoperands);
+ output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
+ output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
+ output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
+ output_asm_insn ("depi 0,31,2,%%r22", xoperands);
+ output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
+ output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
+
+ if (TARGET_PA_20)
+ {
+ output_asm_insn ("bve,n (%%r22)", xoperands);
+ nbytes += 32;
+ }
+ else if (TARGET_NO_SPACE_REGS)
+ {
+ output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
+ nbytes += 32;
+ }
+ else
+ {
+ output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
+ output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
+ output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
+ nbytes += 40;
+ }
}
- else
+ else if (flag_pic)
{
- output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+ /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
+ output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
+
+ /* Load function address into %r1. */
nbytes += 16;
+ xop[0] = xoperands[0];
+ xop[1] = gen_rtx_REG (Pmode, 1);
+ xop[2] = xop[1];
+ pa_output_pic_pcrel_sequence (xop);
+
+ output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
+ }
+ else
+ {
+ /* Load function address into %r22. */
+ nbytes += 8;
+ output_asm_insn ("ldil L'%0,%%r22", xoperands);
+ output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
+
+ /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
+ output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
}
}
diff --git a/gcc/config/pdp11/pdp11.c b/gcc/config/pdp11/pdp11.c
index bd6e0dc..eb3bea4 100644
--- a/gcc/config/pdp11/pdp11.c
+++ b/gcc/config/pdp11/pdp11.c
@@ -155,7 +155,7 @@ static bool pdp11_rtx_costs (rtx, machine_mode, int, int, int *, bool);
static int pdp11_addr_cost (rtx, machine_mode, addr_space_t, bool);
static int pdp11_insn_cost (rtx_insn *insn, bool speed);
static rtx_insn *pdp11_md_asm_adjust (vec<rtx> &, vec<rtx> &,
- vec<const char *> &,
+ vec<machine_mode> &, vec<const char *> &,
vec<rtx> &, HARD_REG_SET &);
static bool pdp11_return_in_memory (const_tree, const_tree);
static rtx pdp11_function_value (const_tree, const_tree, bool);
@@ -2139,9 +2139,10 @@ pdp11_cmp_length (rtx *operands, int words)
compiler. */
static rtx_insn *
-pdp11_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
- vec<const char *> &/*constraints*/,
- vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
+pdp11_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
+ vec<machine_mode> & /*input_modes*/,
+ vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
+ HARD_REG_SET &clobbered_regs)
{
clobbers.safe_push (gen_rtx_REG (CCmode, CC_REGNUM));
SET_HARD_REG_BIT (clobbered_regs, CC_REGNUM);
diff --git a/gcc/config/riscv/riscv-shorten-memrefs.c b/gcc/config/riscv/riscv-shorten-memrefs.c
index b1b57f1..3f34065 100644
--- a/gcc/config/riscv/riscv-shorten-memrefs.c
+++ b/gcc/config/riscv/riscv-shorten-memrefs.c
@@ -75,12 +75,19 @@ private:
regno_map * analyze (basic_block bb);
void transform (regno_map *m, basic_block bb);
- bool get_si_mem_base_reg (rtx mem, rtx *addr);
+ bool get_si_mem_base_reg (rtx mem, rtx *addr, bool *extend);
}; // class pass_shorten_memrefs
bool
-pass_shorten_memrefs::get_si_mem_base_reg (rtx mem, rtx *addr)
+pass_shorten_memrefs::get_si_mem_base_reg (rtx mem, rtx *addr, bool *extend)
{
+ /* Whether it's sign/zero extended. */
+ if (GET_CODE (mem) == ZERO_EXTEND || GET_CODE (mem) == SIGN_EXTEND)
+ {
+ *extend = true;
+ mem = XEXP (mem, 0);
+ }
+
if (!MEM_P (mem) || GET_MODE (mem) != SImode)
return false;
*addr = XEXP (mem, 0);
@@ -110,7 +117,8 @@ pass_shorten_memrefs::analyze (basic_block bb)
{
rtx mem = XEXP (pat, i);
rtx addr;
- if (get_si_mem_base_reg (mem, &addr))
+ bool extend = false;
+ if (get_si_mem_base_reg (mem, &addr, &extend))
{
HOST_WIDE_INT regno = REGNO (XEXP (addr, 0));
/* Do not count store zero as these cannot be compressed. */
@@ -150,7 +158,8 @@ pass_shorten_memrefs::transform (regno_map *m, basic_block bb)
{
rtx mem = XEXP (pat, i);
rtx addr;
- if (get_si_mem_base_reg (mem, &addr))
+ bool extend = false;
+ if (get_si_mem_base_reg (mem, &addr, &extend))
{
HOST_WIDE_INT regno = REGNO (XEXP (addr, 0));
/* Do not transform store zero as these cannot be compressed. */
@@ -161,9 +170,20 @@ pass_shorten_memrefs::transform (regno_map *m, basic_block bb)
}
if (m->get_or_insert (regno) > 3)
{
- addr
- = targetm.legitimize_address (addr, addr, GET_MODE (mem));
- XEXP (pat, i) = replace_equiv_address (mem, addr);
+ if (extend)
+ {
+ addr
+ = targetm.legitimize_address (addr, addr,
+ GET_MODE (XEXP (mem, 0)));
+ XEXP (XEXP (pat, i), 0)
+ = replace_equiv_address (XEXP (mem, 0), addr);
+ }
+ else
+ {
+ addr = targetm.legitimize_address (addr, addr,
+ GET_MODE (mem));
+ XEXP (pat, i) = replace_equiv_address (mem, addr);
+ }
df_insn_rescan (insn);
}
}
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index ff41795..fffd081 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -891,17 +891,13 @@ riscv_compressed_lw_address_p (rtx x)
bool result = riscv_classify_address (&addr, x, GET_MODE (x),
reload_completed);
- /* Before reload, assuming all load/stores of valid addresses get compressed
- gives better code size than checking if the address is reg + small_offset
- early on. */
- if (result && !reload_completed)
- return true;
-
/* Return false if address is not compressed_reg + small_offset. */
if (!result
|| addr.type != ADDRESS_REG
- || (!riscv_compressed_reg_p (REGNO (addr.reg))
- && addr.reg != stack_pointer_rtx)
+ /* Before reload, assume all registers are OK. */
+ || (reload_completed
+ && !riscv_compressed_reg_p (REGNO (addr.reg))
+ && addr.reg != stack_pointer_rtx)
|| !riscv_compressed_lw_offset_p (addr.offset))
return false;
@@ -1528,6 +1524,28 @@ riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
bool
riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
{
+ /* Expand
+ (set (reg:QI target) (mem:QI (address)))
+ to
+ (set (reg:DI temp) (zero_extend:DI (mem:QI (address))))
+ (set (reg:QI target) (subreg:QI (reg:DI temp) 0))
+ with auto-sign/zero extend. */
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_SIZE (mode) < UNITS_PER_WORD
+ && can_create_pseudo_p ()
+ && MEM_P (src))
+ {
+ rtx temp_reg;
+ int zero_extend_p;
+
+ temp_reg = gen_reg_rtx (word_mode);
+ zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
+ emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode,
+ zero_extend_p));
+ riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
+ return true;
+ }
+
if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
{
rtx reg;
@@ -1708,6 +1726,13 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
instructions it needs. */
if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
{
+ /* When optimizing for size, make uncompressible 32-bit addresses
+ more expensive so that compressible 32-bit addresses are
+ preferred. */
+ if (TARGET_RVC && !speed && riscv_mshorten_memrefs && mode == SImode
+ && !riscv_compressed_lw_address_p (XEXP (x, 0)))
+ cost++;
+
*total = COSTS_N_INSNS (cost + tune_param->memory_cost);
return true;
}
diff --git a/gcc/config/rs6000/aix.h b/gcc/config/rs6000/aix.h
index 5e8743a..2db50c8 100644
--- a/gcc/config/rs6000/aix.h
+++ b/gcc/config/rs6000/aix.h
@@ -224,7 +224,8 @@
/* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
#define ADJUST_FIELD_ALIGN(FIELD, TYPE, COMPUTED) \
((TARGET_ALIGN_NATURAL == 0 \
- && TYPE_MODE (strip_array_types (TYPE)) == DFmode) \
+ && (TYPE_MODE (strip_array_types (TYPE)) == DFmode \
+ || TYPE_MODE (strip_array_types (TYPE)) == DCmode)) \
? MIN ((COMPUTED), 32) \
: (COMPUTED))
diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index c8cdb64..026be5d 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -139,7 +139,8 @@
(float_extend:TD (match_operand:DD 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dctqpq %0,%1"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "128")])
;; The result of drdpq is an even/odd register pair with the converted
;; value in the even register and zero in the odd register.
@@ -153,6 +154,7 @@
"TARGET_DFP"
"drdpq %2,%1\;fmr %0,%2"
[(set_attr "type" "dfp")
+ (set_attr "size" "128")
(set_attr "length" "8")])
(define_insn "trunctdsd2"
@@ -206,7 +208,8 @@
(match_operand:DDTD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dcmpu<q> %0,%1,%2"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "<bits>")])
(define_insn "floatdidd2"
[(set (match_operand:DD 0 "gpc_reg_operand" "=d")
@@ -220,7 +223,8 @@
(float:TD (match_operand:DI 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dcffixq %0,%1"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "128")])
;; Convert a decimal64/128 to a decimal64/128 whose value is an integer.
;; This is the first stage of converting it to an integer type.
@@ -230,7 +234,8 @@
(fix:DDTD (match_operand:DDTD 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"drintn<q>. 0,%0,%1,1"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "<bits>")])
;; Convert a decimal64/128 whose value is an integer to an actual integer.
;; This is the second stage of converting decimal float to integer type.
@@ -240,7 +245,8 @@
(fix:DI (match_operand:DDTD 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dctfix<q> %0,%1"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "<bits>")])
;; Decimal builtin support
@@ -262,7 +268,8 @@
UNSPEC_DDEDPD))]
"TARGET_DFP"
"ddedpd<q> %1,%0,%2"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "<bits>")])
(define_insn "dfp_denbcd_<mode>"
[(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
@@ -271,7 +278,8 @@
UNSPEC_DENBCD))]
"TARGET_DFP"
"denbcd<q> %1,%0,%2"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "<bits>")])
(define_insn "dfp_denbcd_v16qi_inst"
[(set (match_operand:TD 0 "gpc_reg_operand" "=d")
@@ -301,7 +309,8 @@
UNSPEC_DXEX))]
"TARGET_DFP"
"dxex<q> %0,%1"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "<bits>")])
(define_insn "dfp_diex_<mode>"
[(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
@@ -310,7 +319,8 @@
UNSPEC_DXEX))]
"TARGET_DFP"
"diex<q> %0,%1,%2"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "<bits>")])
(define_expand "dfptstsfi_<code>_<mode>"
[(set (match_dup 3)
@@ -349,7 +359,8 @@
operands[1] = GEN_INT (63);
return "dtstsfi<q> %0,%1,%2";
}
- [(set_attr "type" "fp")])
+ [(set_attr "type" "fp")
+ (set_attr "size" "<bits>")])
(define_insn "dfp_dscli_<mode>"
[(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
@@ -358,7 +369,8 @@
UNSPEC_DSCLI))]
"TARGET_DFP"
"dscli<q> %0,%1,%2"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "<bits>")])
(define_insn "dfp_dscri_<mode>"
[(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
@@ -367,4 +379,5 @@
UNSPEC_DSCRI))]
"TARGET_DFP"
"dscri<q> %0,%1,%2"
- [(set_attr "type" "dfp")])
+ [(set_attr "type" "dfp")
+ (set_attr "size" "<bits>")])
diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 737a6da..56478fc 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1,7 +1,6 @@
-;; -*- buffer-read-only: t -*-
;; Generated automatically by genfusion.pl
-;; Copyright (C) 2020 Free Software Foundation, Inc.
+;; Copyright (C) 2020,2021 Free Software Foundation, Inc.
;;
;; This file is part of GCC.
;;
@@ -23,18 +22,18 @@
;; load mode is DI result mode is clobber compare mode is CC extend is none
(define_insn_and_split "*ld_cmpdi_cr0_DI_clobber_CC_none"
[(set (match_operand:CC 2 "cc_reg_operand" "=x")
- (compare:CC (match_operand:DI 1 "non_update_memory_operand" "m")
- (match_operand:DI 3 "const_m1_to_1_operand" "n")))
+ (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m")
+ (match_operand:DI 3 "const_m1_to_1_operand" "n")))
(clobber (match_scratch:DI 0 "=r"))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "ld%X1 %0,%1\;cmpdi 0,%0,%3"
+ "ld%X1 %0,%1\;cmpdi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ DImode, NON_PREFIXED_DS))"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
- (compare:CC (match_dup 0)
- (match_dup 3)))]
+ (compare:CC (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -44,18 +43,18 @@
;; load mode is DI result mode is clobber compare mode is CCUNS extend is none
(define_insn_and_split "*ld_cmpldi_cr0_DI_clobber_CCUNS_none"
[(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
- (compare:CCUNS (match_operand:DI 1 "non_update_memory_operand" "m")
- (match_operand:DI 3 "const_0_to_1_operand" "n")))
+ (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "m")
+ (match_operand:DI 3 "const_0_to_1_operand" "n")))
(clobber (match_scratch:DI 0 "=r"))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "ld%X1 %0,%1\;cmpldi 0,%0,%3"
+ "ld%X1 %0,%1\;cmpldi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ DImode, NON_PREFIXED_DS))"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
- (compare:CCUNS (match_dup 0)
- (match_dup 3)))]
+ (compare:CCUNS (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -65,18 +64,18 @@
;; load mode is DI result mode is DI compare mode is CC extend is none
(define_insn_and_split "*ld_cmpdi_cr0_DI_DI_CC_none"
[(set (match_operand:CC 2 "cc_reg_operand" "=x")
- (compare:CC (match_operand:DI 1 "non_update_memory_operand" "m")
- (match_operand:DI 3 "const_m1_to_1_operand" "n")))
+ (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m")
+ (match_operand:DI 3 "const_m1_to_1_operand" "n")))
(set (match_operand:DI 0 "gpc_reg_operand" "=r") (match_dup 1))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "ld%X1 %0,%1\;cmpdi 0,%0,%3"
+ "ld%X1 %0,%1\;cmpdi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ DImode, NON_PREFIXED_DS))"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
- (compare:CC (match_dup 0)
- (match_dup 3)))]
+ (compare:CC (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -86,18 +85,18 @@
;; load mode is DI result mode is DI compare mode is CCUNS extend is none
(define_insn_and_split "*ld_cmpldi_cr0_DI_DI_CCUNS_none"
[(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
- (compare:CCUNS (match_operand:DI 1 "non_update_memory_operand" "m")
- (match_operand:DI 3 "const_0_to_1_operand" "n")))
+ (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "m")
+ (match_operand:DI 3 "const_0_to_1_operand" "n")))
(set (match_operand:DI 0 "gpc_reg_operand" "=r") (match_dup 1))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "ld%X1 %0,%1\;cmpldi 0,%0,%3"
+ "ld%X1 %0,%1\;cmpldi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ DImode, NON_PREFIXED_DS))"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
- (compare:CCUNS (match_dup 0)
- (match_dup 3)))]
+ (compare:CCUNS (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -107,18 +106,18 @@
;; load mode is SI result mode is clobber compare mode is CC extend is none
(define_insn_and_split "*lwa_cmpdi_cr0_SI_clobber_CC_none"
[(set (match_operand:CC 2 "cc_reg_operand" "=x")
- (compare:CC (match_operand:SI 1 "non_update_memory_operand" "m")
- (match_operand:SI 3 "const_m1_to_1_operand" "n")))
+ (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m")
+ (match_operand:SI 3 "const_m1_to_1_operand" "n")))
(clobber (match_scratch:SI 0 "=r"))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lwa%X1 %0,%1\;cmpdi 0,%0,%3"
+ "lwa%X1 %0,%1\;cmpdi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_DS))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ SImode, NON_PREFIXED_DS))"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
- (compare:CC (match_dup 0)
- (match_dup 3)))]
+ (compare:CC (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -129,17 +128,17 @@
(define_insn_and_split "*lwz_cmpldi_cr0_SI_clobber_CCUNS_none"
[(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
(compare:CCUNS (match_operand:SI 1 "non_update_memory_operand" "m")
- (match_operand:SI 3 "const_0_to_1_operand" "n")))
+ (match_operand:SI 3 "const_0_to_1_operand" "n")))
(clobber (match_scratch:SI 0 "=r"))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lwz%X1 %0,%1\;cmpldi 0,%0,%3"
+ "lwz%X1 %0,%1\;cmpldi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_D))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ SImode, NON_PREFIXED_D))"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
- (compare:CCUNS (match_dup 0)
- (match_dup 3)))]
+ (compare:CCUNS (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -149,18 +148,18 @@
;; load mode is SI result mode is SI compare mode is CC extend is none
(define_insn_and_split "*lwa_cmpdi_cr0_SI_SI_CC_none"
[(set (match_operand:CC 2 "cc_reg_operand" "=x")
- (compare:CC (match_operand:SI 1 "non_update_memory_operand" "m")
- (match_operand:SI 3 "const_m1_to_1_operand" "n")))
+ (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m")
+ (match_operand:SI 3 "const_m1_to_1_operand" "n")))
(set (match_operand:SI 0 "gpc_reg_operand" "=r") (match_dup 1))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lwa%X1 %0,%1\;cmpdi 0,%0,%3"
+ "lwa%X1 %0,%1\;cmpdi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_DS))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ SImode, NON_PREFIXED_DS))"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
- (compare:CC (match_dup 0)
- (match_dup 3)))]
+ (compare:CC (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -171,17 +170,17 @@
(define_insn_and_split "*lwz_cmpldi_cr0_SI_SI_CCUNS_none"
[(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
(compare:CCUNS (match_operand:SI 1 "non_update_memory_operand" "m")
- (match_operand:SI 3 "const_0_to_1_operand" "n")))
+ (match_operand:SI 3 "const_0_to_1_operand" "n")))
(set (match_operand:SI 0 "gpc_reg_operand" "=r") (match_dup 1))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lwz%X1 %0,%1\;cmpldi 0,%0,%3"
+ "lwz%X1 %0,%1\;cmpldi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_D))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ SImode, NON_PREFIXED_D))"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
- (compare:CCUNS (match_dup 0)
- (match_dup 3)))]
+ (compare:CCUNS (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -191,18 +190,18 @@
;; load mode is SI result mode is EXTSI compare mode is CC extend is sign
(define_insn_and_split "*lwa_cmpdi_cr0_SI_EXTSI_CC_sign"
[(set (match_operand:CC 2 "cc_reg_operand" "=x")
- (compare:CC (match_operand:SI 1 "non_update_memory_operand" "m")
- (match_operand:SI 3 "const_m1_to_1_operand" "n")))
+ (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m")
+ (match_operand:SI 3 "const_m1_to_1_operand" "n")))
(set (match_operand:EXTSI 0 "gpc_reg_operand" "=r") (sign_extend:EXTSI (match_dup 1)))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lwa%X1 %0,%1\;cmpdi 0,%0,%3"
+ "lwa%X1 %0,%1\;cmpdi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_DS))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ SImode, NON_PREFIXED_DS))"
[(set (match_dup 0) (sign_extend:EXTSI (match_dup 1)))
(set (match_dup 2)
- (compare:CC (match_dup 0)
- (match_dup 3)))]
+ (compare:CC (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -213,17 +212,17 @@
(define_insn_and_split "*lwz_cmpldi_cr0_SI_EXTSI_CCUNS_zero"
[(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
(compare:CCUNS (match_operand:SI 1 "non_update_memory_operand" "m")
- (match_operand:SI 3 "const_0_to_1_operand" "n")))
+ (match_operand:SI 3 "const_0_to_1_operand" "n")))
(set (match_operand:EXTSI 0 "gpc_reg_operand" "=r") (zero_extend:EXTSI (match_dup 1)))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lwz%X1 %0,%1\;cmpldi 0,%0,%3"
+ "lwz%X1 %0,%1\;cmpldi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), SImode, NON_PREFIXED_D))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ SImode, NON_PREFIXED_D))"
[(set (match_dup 0) (zero_extend:EXTSI (match_dup 1)))
(set (match_dup 2)
- (compare:CCUNS (match_dup 0)
- (match_dup 3)))]
+ (compare:CCUNS (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -234,17 +233,17 @@
(define_insn_and_split "*lha_cmpdi_cr0_HI_clobber_CC_sign"
[(set (match_operand:CC 2 "cc_reg_operand" "=x")
(compare:CC (match_operand:HI 1 "non_update_memory_operand" "m")
- (match_operand:HI 3 "const_m1_to_1_operand" "n")))
+ (match_operand:HI 3 "const_m1_to_1_operand" "n")))
(clobber (match_scratch:GPR 0 "=r"))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lha%X1 %0,%1\;cmpdi 0,%0,%3"
+ "lha%X1 %0,%1\;cmpdi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), HImode, NON_PREFIXED_D))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ HImode, NON_PREFIXED_D))"
[(set (match_dup 0) (sign_extend:GPR (match_dup 1)))
(set (match_dup 2)
- (compare:CC (match_dup 0)
- (match_dup 3)))]
+ (compare:CC (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -255,17 +254,17 @@
(define_insn_and_split "*lhz_cmpldi_cr0_HI_clobber_CCUNS_zero"
[(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
(compare:CCUNS (match_operand:HI 1 "non_update_memory_operand" "m")
- (match_operand:HI 3 "const_0_to_1_operand" "n")))
+ (match_operand:HI 3 "const_0_to_1_operand" "n")))
(clobber (match_scratch:GPR 0 "=r"))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lhz%X1 %0,%1\;cmpldi 0,%0,%3"
+ "lhz%X1 %0,%1\;cmpldi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), HImode, NON_PREFIXED_D))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ HImode, NON_PREFIXED_D))"
[(set (match_dup 0) (zero_extend:GPR (match_dup 1)))
(set (match_dup 2)
- (compare:CCUNS (match_dup 0)
- (match_dup 3)))]
+ (compare:CCUNS (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -276,17 +275,17 @@
(define_insn_and_split "*lha_cmpdi_cr0_HI_EXTHI_CC_sign"
[(set (match_operand:CC 2 "cc_reg_operand" "=x")
(compare:CC (match_operand:HI 1 "non_update_memory_operand" "m")
- (match_operand:HI 3 "const_m1_to_1_operand" "n")))
+ (match_operand:HI 3 "const_m1_to_1_operand" "n")))
(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r") (sign_extend:EXTHI (match_dup 1)))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lha%X1 %0,%1\;cmpdi 0,%0,%3"
+ "lha%X1 %0,%1\;cmpdi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), HImode, NON_PREFIXED_D))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ HImode, NON_PREFIXED_D))"
[(set (match_dup 0) (sign_extend:EXTHI (match_dup 1)))
(set (match_dup 2)
- (compare:CC (match_dup 0)
- (match_dup 3)))]
+ (compare:CC (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -297,17 +296,17 @@
(define_insn_and_split "*lhz_cmpldi_cr0_HI_EXTHI_CCUNS_zero"
[(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
(compare:CCUNS (match_operand:HI 1 "non_update_memory_operand" "m")
- (match_operand:HI 3 "const_0_to_1_operand" "n")))
+ (match_operand:HI 3 "const_0_to_1_operand" "n")))
(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r") (zero_extend:EXTHI (match_dup 1)))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lhz%X1 %0,%1\;cmpldi 0,%0,%3"
+ "lhz%X1 %0,%1\;cmpldi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), HImode, NON_PREFIXED_D))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ HImode, NON_PREFIXED_D))"
[(set (match_dup 0) (zero_extend:EXTHI (match_dup 1)))
(set (match_dup 2)
- (compare:CCUNS (match_dup 0)
- (match_dup 3)))]
+ (compare:CCUNS (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -318,17 +317,17 @@
(define_insn_and_split "*lbz_cmpldi_cr0_QI_clobber_CCUNS_zero"
[(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
(compare:CCUNS (match_operand:QI 1 "non_update_memory_operand" "m")
- (match_operand:QI 3 "const_0_to_1_operand" "n")))
+ (match_operand:QI 3 "const_0_to_1_operand" "n")))
(clobber (match_scratch:GPR 0 "=r"))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lbz%X1 %0,%1\;cmpldi 0,%0,%3"
+ "lbz%X1 %0,%1\;cmpldi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), QImode, NON_PREFIXED_D))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ QImode, NON_PREFIXED_D))"
[(set (match_dup 0) (zero_extend:GPR (match_dup 1)))
(set (match_dup 2)
- (compare:CCUNS (match_dup 0)
- (match_dup 3)))]
+ (compare:CCUNS (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
@@ -339,17 +338,17 @@
(define_insn_and_split "*lbz_cmpldi_cr0_QI_GPR_CCUNS_zero"
[(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
(compare:CCUNS (match_operand:QI 1 "non_update_memory_operand" "m")
- (match_operand:QI 3 "const_0_to_1_operand" "n")))
+ (match_operand:QI 3 "const_0_to_1_operand" "n")))
(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (zero_extend:GPR (match_dup 1)))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
- "lbz%X1 %0,%1\;cmpldi 0,%0,%3"
+ "lbz%X1 %0,%1\;cmpldi %2,%0,%3"
"&& reload_completed
&& (cc_reg_not_cr0_operand (operands[2], CCmode)
- || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), QImode, NON_PREFIXED_D))"
+ || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
+ QImode, NON_PREFIXED_D))"
[(set (match_dup 0) (zero_extend:GPR (match_dup 1)))
(set (match_dup 2)
- (compare:CCUNS (match_dup 0)
- (match_dup 3)))]
+ (compare:CCUNS (match_dup 0) (match_dup 3)))]
""
[(set_attr "type" "load")
(set_attr "cost" "8")
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index e1c45f5..c86c743 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -56,7 +56,7 @@ sub mode_to_ldst_char
sub gen_ld_cmpi_p10
{
my ($lmode, $ldst, $clobbermode, $result, $cmpl, $echr, $constpred,
- $ccmode, $np, $extend, $resultmode);
+ $mempred, $ccmode, $np, $extend, $resultmode);
LMODE: foreach $lmode ('DI','SI','HI','QI') {
$ldst = mode_to_ldst_char($lmode);
$clobbermode = $lmode;
@@ -70,11 +70,13 @@ sub gen_ld_cmpi_p10
$result = "GPR" if $result eq "EXTQI";
CCMODE: foreach $ccmode ('CC','CCUNS') {
$np = "NON_PREFIXED_D";
+ $mempred = "non_update_memory_operand";
if ( $ccmode eq 'CC' ) {
next CCMODE if $lmode eq 'QI';
if ( $lmode eq 'DI' || $lmode eq 'SI' ) {
# ld and lwa are both DS-FORM.
$np = "NON_PREFIXED_DS";
+ $mempred = "ds_form_mem_operand";
}
$cmpl = "";
$echr = "a";
@@ -83,6 +85,7 @@ sub gen_ld_cmpi_p10
if ( $lmode eq 'DI' ) {
# ld is DS-form, but lwz is not.
$np = "NON_PREFIXED_DS";
+ $mempred = "ds_form_mem_operand";
}
$cmpl = "l";
$echr = "z";
@@ -105,7 +108,7 @@ sub gen_ld_cmpi_p10
print "(define_insn_and_split \"*l${ldst}${echr}_cmp${cmpl}di_cr0_${lmode}_${result}_${ccmode}_${extend}\"\n";
print " [(set (match_operand:${ccmode} 2 \"cc_reg_operand\" \"=x\")\n";
- print " (compare:${ccmode} (match_operand:${lmode} 1 \"non_update_memory_operand\" \"m\")\n";
+ print " (compare:${ccmode} (match_operand:${lmode} 1 \"${mempred}\" \"m\")\n";
if ($ccmode eq 'CCUNS') { print " "; }
print " (match_operand:${lmode} 3 \"${constpred}\" \"n\")))\n";
if ($result eq 'clobber') {
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 87569f1..a00d3a3 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -288,6 +288,7 @@
DONE;
}
[(set_attr "type" "vecload,vecstore,veclogical")
+ (set_attr "size" "256")
(set_attr "length" "*,*,8")])
@@ -321,7 +322,7 @@
(set_attr "length" "*,*,16")
(set_attr "max_prefixed_insns" "2,2,*")])
-(define_expand "mma_assemble_pair"
+(define_expand "vsx_assemble_pair"
[(match_operand:OO 0 "vsx_register_operand")
(match_operand:V16QI 1 "mma_assemble_input_operand")
(match_operand:V16QI 2 "mma_assemble_input_operand")]
@@ -334,7 +335,7 @@
DONE;
})
-(define_insn_and_split "*mma_assemble_pair"
+(define_insn_and_split "*vsx_assemble_pair"
[(set (match_operand:OO 0 "vsx_register_operand" "=wa")
(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
@@ -351,7 +352,7 @@
DONE;
})
-(define_expand "mma_disassemble_pair"
+(define_expand "vsx_disassemble_pair"
[(match_operand:V16QI 0 "mma_disassemble_output_operand")
(match_operand:OO 1 "vsx_register_operand")
(match_operand 2 "const_0_to_1_operand")]
@@ -366,7 +367,7 @@
DONE;
})
-(define_insn_and_split "*mma_disassemble_pair"
+(define_insn_and_split "*vsx_disassemble_pair"
[(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
(unspec:V16QI [(match_operand:OO 1 "vsx_register_operand" "wa")
(match_operand 2 "const_0_to_1_operand")]
diff --git a/gcc/config/rs6000/pcrel-opt.md b/gcc/config/rs6000/pcrel-opt.md
new file mode 100644
index 0000000..9706a39
--- /dev/null
+++ b/gcc/config/rs6000/pcrel-opt.md
@@ -0,0 +1,401 @@
+;; Machine description for the PCREL_OPT optimization.
+;; Copyright (C) 2020-2021 Free Software Foundation, Inc.
+;; Contributed by Michael Meissner (meissner@linux.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Support for the PCREL_OPT optimization. PCREL_OPT looks for instances where
+;; an external variable is used only once, either for reading or for writing.
+;;
+;; If we are optimizing a single read, normally the code would look like:
+;;
+;; (set (reg:DI <ptr>)
+;; (symbol_ref:DI "<extern_addr>")) # <data> is currently dead
+;;
+;; ... # insns do not need to be adjacent
+;;
+;; (set (reg:SI <data>)
+;; (mem:SI (reg:DI <xxx>))) # <ptr> dies with this insn
+;;
+;; We transform this into:
+;;
+;; (parallel [(set (reg:DI <ptr>)
+;; (unspec:SI [(symbol_ref:DI <extern_addr>)
+;; (const_int <marker>)]
+;; UNSPEC_PCREL_OPT_LD_ADDR))
+;; (set (reg:DI <data>)
+;; (unspec:DI [(const_int 0)]
+;; UNSPEC_PCREL_OPT_LD_DATA))])
+;;
+;; ...
+;;
+;; (parallel [(set (reg:SI <data>)
+;; (unspec:SI [(mem:SI (reg:DI <ptr>))
+;; (reg:DI <data>)
+;; (const_int <marker>)]
+;; UNSPEC_PCREL_OPT_LD_RELOC))
+;; (clobber (reg:DI <ptr>))])
+;;
+;; The marker is an integer constant that links the load of the external
+;; address to the load of the actual variable.
+;;
+;; In the first insn, we set both the address of the external variable, and
+;; mark that the variable being loaded both are created in that insn, and are
+;; consumed in the second insn. It doesn't matter what mode the register that
+;; we will ultimately do the load into, so we use DImode. We just need to mark
+;; that both registers may be set in the first insn, and will be used in the
+;; second insn.
+;;
+;; Since we use UNSPEC's and link both the the register holding the external
+;; address and the value being loaded, it should prevent other passes from
+;; modifying it.
+;;
+;; If the register being loaded is the same as the base register, we use an
+;; alternate form of the insns.
+;;
+;; (set (reg:DI <data_ptr>)
+;; (unspec:DI [(symbol_ref:DI <extern_addr>)
+;; (const_int <marker>)]
+;; UNSPEC_PCREL_OPT_LD_SAME_REG))
+;;
+;; ...
+;;
+;; (parallel [(set (reg:SI <data>)
+;; (unspec:SI [(mem:SI (reg:DI <ptr>))
+;; (reg:DI <data>)
+;; (const_int <marker>)]
+;; UNSPEC_PCREL_OPT_LD_RELOC))
+;; (clobber (reg:DI <ptr>))])
+
+(define_c_enum "unspec"
+ [UNSPEC_PCREL_OPT_LD_ADDR
+ UNSPEC_PCREL_OPT_LD_DATA
+ UNSPEC_PCREL_OPT_LD_SAME_REG
+ UNSPEC_PCREL_OPT_LD_RELOC
+ UNSPEC_PCREL_OPT_ST_ADDR
+ UNSPEC_PCREL_OPT_ST_RELOC])
+
+;; Modes that are supported for PCREL_OPT
+(define_mode_iterator PCRELOPT [QI HI SI DI TI SF DF KF
+ V1TI V2DI V4SI V8HI V16QI V2DF V4SF
+ (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")])
+
+;; Vector modes for PCREL_OPT
+(define_mode_iterator PCRELOPT_VECT [TI KF V1TI V2DI V4SI V8HI V16QI V2DF V4SF
+ (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")])
+
+;; Insn for loading the external address, where the register being loaded is not
+;; the same as the register being loaded with the data.
+(define_insn "pcrel_opt_ld_addr"
+ [(set (match_operand:DI 0 "base_reg_operand" "=&b,&b")
+ (unspec:DI [(match_operand:DI 1 "pcrel_external_address")
+ (match_operand 2 "const_int_operand" "n,n")]
+ UNSPEC_PCREL_OPT_LD_ADDR))
+ (set (match_operand:DI 3 "gpc_reg_operand" "=r,wa")
+ (unspec:DI [(const_int 0)]
+ UNSPEC_PCREL_OPT_LD_DATA))]
+ "TARGET_PCREL_OPT
+ && reg_or_subregno (operands[0]) != reg_or_subregno (operands[3])"
+ "ld %0,%a1\n.Lpcrel%2:"
+ [(set_attr "prefixed" "yes")
+ (set_attr "type" "load")
+ (set_attr "loads_external_address" "yes")])
+
+;; Alternate form of loading up the external address that is the same register
+;; as the final load.
+(define_insn "pcrel_opt_ld_addr_same_reg"
+ [(set (match_operand:DI 0 "base_reg_operand" "=b")
+ (unspec:DI [(match_operand:DI 1 "pcrel_external_address")
+ (match_operand 2 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_LD_SAME_REG))]
+ "TARGET_PCREL_OPT"
+ "ld %0,%a1\n.Lpcrel%2:"
+ [(set_attr "prefixed" "yes")
+ (set_attr "type" "load")
+ (set_attr "loads_external_address" "yes")])
+
+;; PCREL_OPT modes that are optimized for loading or storing GPRs.
+(define_mode_iterator PCRELOPT_GPR [QI HI SI DI SF DF])
+
+(define_mode_attr PCRELOPT_GPR_LD [(QI "lbz")
+ (HI "lhz")
+ (SI "lwz")
+ (SF "lwz")
+ (DI "ld")
+ (DF "ld")])
+
+;; PCREL_OPT load operation of GPRs. Operand 4 (the register used to hold the
+;; address of the external symbol) is SCRATCH if the same register is used for
+;; the normal load.
+(define_insn "*pcrel_opt_ld<mode>_gpr"
+ [(parallel [(set (match_operand:PCRELOPT_GPR 0 "int_reg_operand" "+r")
+ (unspec:PCRELOPT_GPR [
+ (match_operand:PCRELOPT_GPR 1 "d_form_memory" "m")
+ (match_operand:DI 2 "int_reg_operand" "0")
+ (match_operand 3 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_LD_RELOC))
+ (clobber (match_scratch:DI 4 "=bX"))])]
+ "TARGET_PCREL_OPT
+ && (GET_CODE (operands[4]) == SCRATCH
+ || reg_mentioned_p (operands[4], operands[1]))"
+{
+ output_pcrel_opt_reloc (operands[3]);
+ return "<PCRELOPT_GPR_LD> %0,%1";
+}
+ [(set_attr "type" "load")])
+
+;; PCREL_OPT load with sign/zero extension
+(define_insn "*pcrel_opt_ldsi_<u><mode>_gpr"
+ [(set (match_operand:EXTSI 0 "int_reg_operand" "+r")
+ (any_extend:EXTSI
+ (unspec:SI [(match_operand:SI 1 "d_form_memory" "m")
+ (match_operand:DI 2 "int_reg_operand" "0")
+ (match_operand 3 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_LD_RELOC)))
+ (clobber (match_scratch:DI 4 "=bX"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[3]);
+ return "lw<az> %0,%1";
+}
+ [(set_attr "type" "load")])
+
+(define_insn "*pcrel_opt_ldhi_<u><mode>_gpr"
+ [(set (match_operand:EXTHI 0 "int_reg_operand" "+r")
+ (any_extend:EXTHI
+ (unspec:HI [(match_operand:HI 1 "d_form_memory" "m")
+ (match_operand:DI 2 "int_reg_operand" "0")
+ (match_operand 3 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_LD_RELOC)))
+ (clobber (match_scratch:DI 4 "=bX"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[3]);
+ return "lh<az> %0,%1";
+}
+ [(set_attr "type" "load")])
+
+(define_insn "*pcrel_opt_ldqi_u<mode>_gpr"
+ [(set (match_operand:EXTQI 0 "int_reg_operand" "+r")
+ (zero_extend:EXTQI
+ (unspec:QI [(match_operand:QI 1 "d_form_memory" "m")
+ (match_operand:DI 2 "int_reg_operand" "0")
+ (match_operand 3 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_LD_RELOC)))
+ (clobber (match_scratch:DI 4 "=bX"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[3]);
+ return "lbz %0,%1";
+}
+ [(set_attr "type" "load")])
+
+;; Scalar types that can be optimized by loading them into floating point
+;; or Altivec registers.
+(define_mode_iterator PCRELOPT_FP [DI DF SF])
+
+;; Load instructions to load up scalar floating point or 64-bit integer values
+;; into floating point registers or Altivec registers.
+(define_mode_attr PCRELOPT_FPR_LD [(DI "lfd") (DF "lfd") (SF "lfs")])
+(define_mode_attr PCRELOPT_VMX_LD [(DI "lxsd") (DF "lxsd") (SF "lxssp")])
+
+;; PCREL_OPT load operation of scalar DF/DI/SF into vector registers.
+(define_insn "*pcrel_opt_ld<mode>_vsx"
+ [(set (match_operand:PCRELOPT_FP 0 "vsx_register_operand" "+d,v")
+ (unspec:PCRELOPT_FP [(match_operand:PCRELOPT_FP 1 "d_form_memory" "m,m")
+ (match_operand:DI 2 "vsx_register_operand" "0,0")
+ (match_operand 3 "const_int_operand" "n,n")]
+ UNSPEC_PCREL_OPT_LD_RELOC))
+ (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[3]);
+ return which_alternative ? "<PCRELOPT_VMX_LD> %0,%1"
+ : "<PCRELOPT_FPR_LD> %0,%1";
+}
+ [(set_attr "type" "fpload")])
+
+;; PCREL_OPT optimization extending SFmode to DFmode via a load.
+(define_insn "*pcrel_opt_ldsf_df"
+ [(set (match_operand:DF 0 "vsx_register_operand" "+d,v")
+ (float_extend:DF
+ (unspec:SF [(match_operand:SF 1 "d_form_memory" "m,m")
+ (match_operand:DI 2 "vsx_register_operand" "0,0")
+ (match_operand 3 "const_int_operand" "n,n")]
+ UNSPEC_PCREL_OPT_LD_RELOC)))
+ (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[3]);
+ return which_alternative ? "lxssp %0,%1" : "lfs %0,%1";
+}
+ [(set_attr "type" "fpload")])
+
+;; PCREL_OPT load operation of vector/float128 types into vector registers.
+(define_insn "*pcrel_opt_ld<mode>"
+ [(set (match_operand:PCRELOPT_VECT 0 "vsx_register_operand" "+wa")
+ (unspec:PCRELOPT_VECT [(match_operand:PCRELOPT_VECT 1 "d_form_memory" "m")
+ (match_operand:DI 2 "vsx_register_operand" "0")
+ (match_operand 3 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_LD_RELOC))
+ (clobber (match_operand:DI 4 "base_reg_operand" "=b"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[3]);
+ return "lxv %x0,%1";
+}
+ [(set_attr "type" "vecload")])
+
+
+;; PCREL_OPT optimization for stores. We need to put the label after the PLD
+;; instruction, because the assembler might insert a NOP before the PLD for
+;; alignment.
+;;
+;; If we are optimizing a single write, normally the code would look like:
+;;
+;; (set (reg:DI <ptr>)
+;; (symbol_ref:DI "<extern_addr>")) # <data> must be live here
+;;
+;; ... # insns do not need to be adjacent
+;;
+;; (set (mem:SI (reg:DI <xxx>))
+;; (reg:SI <data>)) # <ptr> dies with this insn
+;;
+;; We optimize this to be:
+;;
+;; (parallel [(set (reg:DI <ptr>)
+;; (unspec:DI [(symbol_ref:DI "<extern_addr>")
+;; (const_int <marker>)]
+;; UNSPEC_PCREL_OPT_ST_ADDR))
+;; (use (reg:<MODE> <data>))])
+;;
+;; ... # insns do not need to be adjacent
+;;
+;; (parallel [(set (mem:<MODE> (reg:DI <ptr>))
+;; (unspec:<MODE> [(reg:<MODE> <data>)
+;; (const_int <marker>)]
+;; UNSPEC_PCREL_OPT_ST_RELOC))
+;; (clobber (reg:DI <ptr>))])
+
+(define_insn "*pcrel_opt_st_addr<mode>"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=b")
+ (unspec:DI [(match_operand:DI 1 "pcrel_external_address")
+ (match_operand 2 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_ST_ADDR))
+ (use (match_operand:PCRELOPT 3 "gpc_reg_operand" "rwa"))]
+ "TARGET_PCREL_OPT"
+ "ld %0,%a1\n.Lpcrel%2:"
+ [(set_attr "prefixed" "yes")
+ (set_attr "type" "load")
+ (set_attr "loads_external_address" "yes")])
+
+;; PCREL_OPT stores.
+(define_insn "*pcrel_opt_st<mode>"
+ [(set (match_operand:QHSI 0 "d_form_memory" "=m")
+ (unspec:QHSI [(match_operand:QHSI 1 "gpc_reg_operand" "r")
+ (match_operand 2 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (match_operand:DI 3 "base_reg_operand" "=b"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[2]);
+ return "st<wd> %1,%0";
+}
+ [(set_attr "type" "store")])
+
+(define_insn "*pcrel_opt_stdi"
+ [(set (match_operand:DI 0 "d_form_memory" "=m,m,m")
+ (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r,d,v")
+ (match_operand 2 "const_int_operand" "n,n,n")]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
+ "TARGET_PCREL_OPT && TARGET_POWERPC64"
+{
+ output_pcrel_opt_reloc (operands[2]);
+ switch (which_alternative)
+ {
+ case 0:
+ return "std %1,%0";
+ case 1:
+ return "stfd %1,%0";
+ case 2:
+ return "stxsd %1,%0";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "store,fpstore,fpstore")])
+
+(define_insn "*pcrel_opt_stsf"
+ [(set (match_operand:SF 0 "d_form_memory" "=m,m,m")
+ (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "d,v,r")
+ (match_operand 2 "const_int_operand" "n,n,n")]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[2]);
+ switch (which_alternative)
+ {
+ case 0:
+ return "stfs %1,%0";
+ case 1:
+ return "stxssp %1,%0";
+ case 2:
+ return "stw %1,%0";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fpstore,fpstore,store")])
+
+(define_insn "*pcrel_opt_stdf"
+ [(set (match_operand:DF 0 "d_form_memory" "=m,m,m")
+ (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d,v,r")
+ (match_operand 2 "const_int_operand" "n,n,n")]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
+ "TARGET_PCREL_OPT
+ && (TARGET_POWERPC64 || vsx_register_operand (operands[1], DFmode))"
+{
+ output_pcrel_opt_reloc (operands[2]);
+ switch (which_alternative)
+ {
+ case 0:
+ return "stfd %1,%0";
+ case 1:
+ return "stxsd %1,%0";
+ case 2:
+ return "std %1,%0";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fpstore,fpstore,store")])
+
+(define_insn "*pcrel_opt_st<mode>"
+ [(set (match_operand:PCRELOPT_VECT 0 "d_form_memory" "=m")
+ (unspec:PCRELOPT_VECT [(match_operand:PCRELOPT_VECT 1 "gpc_reg_operand" "wa")
+ (match_operand 2 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (match_operand:DI 3 "base_reg_operand" "=b"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[2]);
+ return "stxv %x1,%0";
+}
+ [(set_attr "type" "vecstore")])
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index bd26c62..69f3c70 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -992,6 +992,20 @@
return INTVAL (offset) % 4 == 0;
})
+;; Return 1 if the operand is a memory operand that has a valid address for
+;; a DS-form instruction. I.e. the address has to be either just a register,
+;; or register + const where the two low order bits of const are zero.
+(define_predicate "ds_form_mem_operand"
+ (match_code "subreg,mem")
+{
+ if (!any_memory_operand (op, mode))
+ return false;
+
+ rtx addr = XEXP (op, 0);
+
+ return address_to_insn_form (addr, mode, NON_PREFIXED_DS) == INSN_FORM_DS;
+})
+
;; Return 1 if the operand, used inside a MEM, is a SYMBOL_REF.
(define_predicate "symbol_ref_operand"
(and (match_code "symbol_ref")
@@ -1904,3 +1918,24 @@
{
return address_is_prefixed (XEXP (op, 0), mode, NON_PREFIXED_DEFAULT);
})
+
+;; Return true if the operand is a valid memory operand with a D-form
+;; address that could be merged with the load of a PC-relative external address
+;; with the PCREL_OPT optimization. We don't check here whether or not the
+;; offset needs to be used in a DS-FORM (bottom 2 bits 0) or DQ-FORM (bottom 4
+;; bits 0) instruction.
+(define_predicate "d_form_memory"
+ (match_code "mem")
+{
+ if (!memory_operand (op, mode))
+ return false;
+
+ rtx addr = XEXP (op, 0);
+
+ if (REG_P (addr))
+ return true;
+ if (SUBREG_P (addr) && REG_P (SUBREG_REG (addr)))
+ return true;
+
+ return !indexed_address (addr, mode);
+})
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 058a32a..609bebd 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -43,6 +43,10 @@
ATTR builtin attribute information.
ICODE Insn code of the function that implements the builtin. */
+#ifndef RS6000_BUILTIN_COMPAT
+ #undef BU_COMPAT
+ #define BU_COMPAT(ENUM, COMPAT_NAME)
+
#ifndef RS6000_BUILTIN_0
#error "RS6000_BUILTIN_0 is not defined."
#endif
@@ -87,6 +91,36 @@
#error "RS6000_BUILTIN_X is not defined."
#endif
+#else
+ /* Compatibility builtins. These builtins are simply mapped into
+ their compatible builtin function identified by ENUM. */
+ #undef BU_COMPAT
+ #define BU_COMPAT(ENUM, COMPAT_NAME) { ENUM, "__builtin_" COMPAT_NAME },
+
+ #undef RS6000_BUILTIN_0
+ #undef RS6000_BUILTIN_1
+ #undef RS6000_BUILTIN_2
+ #undef RS6000_BUILTIN_3
+ #undef RS6000_BUILTIN_4
+ #undef RS6000_BUILTIN_A
+ #undef RS6000_BUILTIN_D
+ #undef RS6000_BUILTIN_H
+ #undef RS6000_BUILTIN_M
+ #undef RS6000_BUILTIN_P
+ #undef RS6000_BUILTIN_X
+ #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
+ #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+ #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+ #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+ #define RS6000_BUILTIN_4(ENUM, NAME, MASK, ATTR, ICODE)
+ #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+ #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+ #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
+ #define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE)
+ #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+ #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+#endif
+
#ifndef BU_AV_1
/* Define convenience macros using token pasting to allow fitting everything in
one line. */
@@ -368,6 +402,23 @@
| RS6000_BTC_BINARY), \
CODE_FOR_ ## ICODE) /* ICODE */
+/* Like BU_MMA_2, but uses "vsx" rather than "mma" naming. */
+#define BU_MMA_V2(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_vsx_" NAME, /* NAME */ \
+ RS6000_BTM_MMA, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_BINARY \
+ | RS6000_BTC_VOID \
+ | RS6000_BTC_GIMPLE), \
+ CODE_FOR_nothing) /* ICODE */ \
+ RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM ## _INTERNAL, /* ENUM */ \
+ "__builtin_vsx_" NAME "_internal", /* NAME */ \
+ RS6000_BTM_MMA, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_BINARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
#define BU_MMA_3(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \
"__builtin_mma_" NAME, /* NAME */ \
@@ -384,6 +435,23 @@
| RS6000_BTC_TERNARY), \
CODE_FOR_ ## ICODE) /* ICODE */
+/* Like BU_MMA_3, but uses "vsx" rather than "mma" naming. */
+#define BU_MMA_V3(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_vsx_" NAME, /* NAME */ \
+ RS6000_BTM_MMA, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_TERNARY \
+ | RS6000_BTC_VOID \
+ | RS6000_BTC_GIMPLE), \
+ CODE_FOR_nothing) /* ICODE */ \
+ RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM ## _INTERNAL, /* ENUM */ \
+ "__builtin_vsx_" NAME "_internal", /* NAME */ \
+ RS6000_BTM_MMA, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_TERNARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
#define BU_MMA_5(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \
"__builtin_mma_" NAME, /* NAME */ \
@@ -3136,9 +3204,11 @@ BU_MMA_1 (XXMTACC, "xxmtacc", QUAD, mma_xxmtacc)
BU_MMA_1 (XXSETACCZ, "xxsetaccz", MISC, mma_xxsetaccz)
BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc", QUAD, mma_disassemble_acc)
-BU_MMA_2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, mma_disassemble_pair)
+BU_MMA_V2 (DISASSEMBLE_PAIR, "disassemble_pair", PAIR, vsx_disassemble_pair)
+BU_COMPAT (VSX_BUILTIN_DISASSEMBLE_PAIR, "mma_disassemble_pair")
-BU_MMA_3 (ASSEMBLE_PAIR, "assemble_pair", MISC, mma_assemble_pair)
+BU_MMA_V3 (ASSEMBLE_PAIR, "assemble_pair", MISC, vsx_assemble_pair)
+BU_COMPAT (VSX_BUILTIN_ASSEMBLE_PAIR, "mma_assemble_pair")
BU_MMA_3 (XVBF16GER2, "xvbf16ger2", MISC, mma_xvbf16ger2)
BU_MMA_3 (XVF16GER2, "xvf16ger2", MISC, mma_xvf16ger2)
BU_MMA_3 (XVF32GER, "xvf32ger", MISC, mma_xvf32ger)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index de0ce50..f567625 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -89,6 +89,12 @@
#define TARGET_NO_PROTOTYPE 0
#endif
+struct builtin_compatibility
+{
+ const enum rs6000_builtins code;
+ const char *const name;
+};
+
struct builtin_description
{
const HOST_WIDE_INT mask;
@@ -8839,6 +8845,13 @@ def_builtin (const char *name, tree type, enum rs6000_builtins code)
(int)code, name, attr_string);
}
+static const struct builtin_compatibility bdesc_compat[] =
+{
+#define RS6000_BUILTIN_COMPAT
+#include "rs6000-builtin.def"
+};
+#undef RS6000_BUILTIN_COMPAT
+
/* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
#undef RS6000_BUILTIN_0
@@ -10115,7 +10128,7 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
unsigned attr_args = attr & RS6000_BTC_OPND_MASK;
if (attr & RS6000_BTC_QUAD
- || fcode == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+ || fcode == VSX_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
attr_args++;
gcc_assert (nopnds == attr_args);
@@ -11730,7 +11743,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
tree new_decl;
if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
- || fncode == MMA_BUILTIN_DISASSEMBLE_PAIR)
+ || fncode == VSX_BUILTIN_DISASSEMBLE_PAIR)
{
/* This is an MMA disassemble built-in function. */
push_gimplify_context (true);
@@ -11745,7 +11758,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
another accumulator/pair, then just copy the entire thing as is. */
if ((fncode == MMA_BUILTIN_DISASSEMBLE_ACC
&& TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
- || (fncode == MMA_BUILTIN_DISASSEMBLE_PAIR
+ || (fncode == VSX_BUILTIN_DISASSEMBLE_PAIR
&& TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node))
{
tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR,
@@ -11847,7 +11860,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
gcc_unreachable ();
}
- if (fncode == MMA_BUILTIN_ASSEMBLE_PAIR)
+ if (fncode == VSX_BUILTIN_ASSEMBLE_PAIR)
lhs = make_ssa_name (vector_pair_type_node);
else
lhs = make_ssa_name (vector_quad_type_node);
@@ -13447,6 +13460,18 @@ rs6000_init_builtins (void)
#ifdef SUBTARGET_INIT_BUILTINS
SUBTARGET_INIT_BUILTINS;
#endif
+
+ /* Register the compatibility builtins after all of the normal
+ builtins have been defined. */
+ const struct builtin_compatibility *d = bdesc_compat;
+ unsigned i;
+ for (i = 0; i < ARRAY_SIZE (bdesc_compat); i++, d++)
+ {
+ tree decl = rs6000_builtin_decls[(int)d->code];
+ if (decl != NULL)
+ add_builtin_function (d->name, TREE_TYPE (decl), (int)d->code,
+ BUILT_IN_MD, NULL, NULL_TREE);
+ }
}
/* Returns the rs6000 builtin decl for CODE. */
@@ -14119,7 +14144,7 @@ mma_init_builtins (void)
else
{
if (!(d->code == MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
- || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+ || d->code == VSX_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
&& (attr & RS6000_BTC_QUAD) == 0)
attr_args--;
@@ -14129,7 +14154,7 @@ mma_init_builtins (void)
/* This is a disassemble pair/acc function. */
if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC
- || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR)
+ || d->code == VSX_BUILTIN_DISASSEMBLE_PAIR)
{
op[nopnds++] = build_pointer_type (void_type_node);
if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC)
@@ -14143,7 +14168,7 @@ mma_init_builtins (void)
unsigned j = 0;
if (attr & RS6000_BTC_QUAD
&& d->code != MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
- && d->code != MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+ && d->code != VSX_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
j = 1;
for (; j < (unsigned) insn_data[icode].n_operands; j++)
{
@@ -14151,7 +14176,7 @@ mma_init_builtins (void)
if (gimple_func && mode == XOmode)
op[nopnds++] = build_pointer_type (vector_quad_type_node);
else if (gimple_func && mode == OOmode
- && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
+ && d->code == VSX_BUILTIN_ASSEMBLE_PAIR)
op[nopnds++] = build_pointer_type (vector_pair_type_node);
else
/* MMA uses unsigned types. */
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index f0cf79e..cbbb42c 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -77,6 +77,7 @@
/* Flags that need to be turned off if -mno-power10. */
#define OTHER_POWER10_MASKS (OPTION_MASK_MMA \
| OPTION_MASK_PCREL \
+ | OPTION_MASK_PCREL_OPT \
| OPTION_MASK_PREFIXED)
#define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER \
@@ -147,6 +148,7 @@
| OPTION_MASK_P9_MISC \
| OPTION_MASK_P9_VECTOR \
| OPTION_MASK_PCREL \
+ | OPTION_MASK_PCREL_OPT \
| OPTION_MASK_POPCNTB \
| OPTION_MASK_POPCNTD \
| OPTION_MASK_POWERPC64 \
diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
index 606ad3e..c8e46ba 100644
--- a/gcc/config/rs6000/rs6000-passes.def
+++ b/gcc/config/rs6000/rs6000-passes.def
@@ -24,4 +24,12 @@ along with GCC; see the file COPYING3. If not see
REPLACE_PASS (PASS, INSTANCE, TGT_PASS)
*/
+ /* Pass to add the appropriate vector swaps on power8 little endian systems.
+ The power8 does not have instructions that automaticaly do the byte swaps
+ for loads and stores. */
INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
+
+ /* Pass to do the PCREL_OPT optimization that combines the load of an
+ external symbol's address along with a single load or store using that
+ address as a base register. */
+ INSERT_PASS_BEFORE (pass_sched2, 1, pass_pcrel_opt);
diff --git a/gcc/config/rs6000/rs6000-pcrel-opt.c b/gcc/config/rs6000/rs6000-pcrel-opt.c
new file mode 100644
index 0000000..32275aa
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-pcrel-opt.c
@@ -0,0 +1,910 @@
+/* Subroutines used support the pc-relative linker optimization.
+ Copyright (C) 2020-2021 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file implements a RTL pass that looks for pc-relative loads of the
+ address of an external variable using the PCREL_GOT relocation and a single
+ load that uses that external address. If that is found we create the
+ PCREL_OPT relocation to possibly convert:
+
+ pld addr_reg,var@pcrel@got
+
+ <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+ lwz data_reg,0(addr_reg)
+
+ into:
+
+ plwz data_reg,var@pcrel
+
+ <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+ nop
+
+ Of course it would be nice to be able to put the plwz in this example in
+ place of the lwz but the linker cannot easily replace a 4-byte instruction
+ with an 8-byte one.
+
+ If the variable is not defined in the main program or the code using it is
+ not in the main program, the linker puts the address in the .got section and
+ generates:
+
+ .section .got
+ .Lvar_got:
+ .dword var
+
+ At the point where it is referenced, we have:
+
+ .section .text
+ pld addr_reg,.Lvar_got@pcrel
+
+ <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+ lwz data_reg,0(addr_reg)
+
+ We look for a single usage in the basic block where this external
+ address is loaded, and convert it to a PCREL_OPT relocation so the
+ linker can convert it to a single plwz in this case. Multiple uses
+ or references in another basic block will force us to not use the
+ PCREL_OPT relocation.
+
+ We also optimize stores to the address of an external variable using the
+ PCREL_GOT relocation and a single store that uses that external address. If
+ that is found we create the PCREL_OPT relocation to possibly convert:
+
+ pld addr_reg,var@pcrel@got
+
+ <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+ stw data_reg,0(addr_reg)
+
+ into:
+
+ pstw data_reg,var@pcrel
+
+ <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+ nop
+
+ If the variable is not defined in the main program or the code using it is
+ not in the main program, the linker puts the address in the .got section and
+ generates:
+
+ .section .got
+ .Lvar_got:
+ .dword var
+
+ And at our point of reference we have:
+
+ .section .text
+ pld addr_reg,.Lvar_got@pcrel
+
+ <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+ stw data_reg,0(addr_reg)
+
+ We only look for a single usage in the basic block where the external
+ address is loaded. Multiple uses or references in another basic block will
+ force us to not use the PCREL_OPT relocation. */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "expmed.h"
+#include "optabs.h"
+#include "recog.h"
+#include "df.h"
+#include "tm_p.h"
+#include "ira.h"
+#include "print-tree.h"
+#include "varasm.h"
+#include "explow.h"
+#include "expr.h"
+#include "output.h"
+#include "tree-pass.h"
+#include "rtx-vector-builder.h"
+#include "print-rtl.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+
+/* Various counters. */
+static struct {
+ unsigned long extern_addrs;
+ unsigned long loads;
+ unsigned long adjacent_loads;
+ unsigned long failed_loads;
+ unsigned long stores;
+ unsigned long adjacent_stores;
+ unsigned long failed_stores;
+} counters;
+
+/* Unique integer that is appended to .Lpcrel to make a pcrel_opt label. */
+static unsigned int pcrel_opt_next_num;
+
+
+/* Optimize a PC-relative load address to be used in a load. Before it calls
+ this function, pcrel_opt_address () uses DF to make sure that it is safe
+ to do the PCREL_OPT optimization on these insns.
+
+ Convert insns of the form:
+
+ (set (reg:DI addr)
+ (symbol_ref:DI "ext_symbol"))
+
+ ...
+
+ (set (reg:<MODE> value)
+ (mem:<MODE> (reg:DI addr)))
+
+ into:
+
+ (parallel [(set (reg:DI addr)
+ (unspec:<MODE> [(symbol_ref:DI "ext_symbol")
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_LD_ADDR))
+ (set (reg:DI data)
+ (unspec:DI [(const_int 0)]
+ UNSPEC_PCREL_OPT_LD_DATA))])
+
+ ...
+
+ (parallel [(set (reg:<MODE>)
+ (unspec:<MODE> [(mem:<MODE> (reg:DI addr))
+ (reg:DI data)
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_LD_RELOC))
+ (clobber (reg:DI addr))])
+
+ Because PCREL_OPT will move the actual location of the load from the second
+ insn to the first, we need to have the register for the load data be live
+ starting at the first insn.
+
+ If the destination register for the data being loaded is the same register
+ used to hold the extern address, we generate this insn instead:
+
+ (set (reg:DI data)
+ (unspec:DI [(symbol_ref:DI "ext_symbol")
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_LD_SAME_REG))
+
+ In the first insn, we set both the address of the external variable, and mark
+ that the variable being loaded both are created in that insn, and are
+ consumed in the second insn. The mode used in the first insn for the data
+ register that will be loaded in the second insn doesn't matter in the end so
+ we use DImode. We just need to mark that both registers may be set in the
+ first insn, and will be used in the second insn.
+
+ The UNSPEC_PCREL_OPT_LD_ADDR insn will generate the load address plus
+ a definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_LD_RELOC
+ insn will generate the .reloc to tell the linker to tie the load address and
+ load using that address together.
+
+ pld b,ext_symbol@got@pcrel
+ .Lpcrel1:
+
+ ...
+
+ .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+ lwz r,0(b)
+
+ If ext_symbol is defined in another object file in the main program and we
+ are linking the main program, the linker will convert the above instructions
+ to:
+
+ plwz r,ext_symbol@got@pcrel
+
+ ...
+
+ nop
+
+ ADDR_INSN is the insn that is loading the address.
+ LOAD_INSN is the insn that uses the address to load the actual data. */
+
+static void
+pcrel_opt_load (rtx_insn *addr_insn, rtx_insn *load_insn)
+{
+ rtx addr_set = PATTERN (addr_insn);
+ gcc_assert (GET_CODE (addr_set) == SET);
+
+ rtx addr_reg = SET_DEST (addr_set);
+ gcc_assert (base_reg_operand (addr_reg, Pmode));
+
+ rtx addr_symbol = SET_SRC (addr_set);
+ gcc_assert (pcrel_external_address (addr_symbol, Pmode));
+
+ rtx load_set = PATTERN (load_insn);
+ gcc_assert (GET_CODE (load_set) == SET);
+
+ /* Make sure there are no references to the register being loaded
+ between the two insns. */
+ rtx reg = SET_DEST (load_set);
+ if (reg_used_between_p (reg, addr_insn, load_insn)
+ || reg_set_between_p (reg, addr_insn, load_insn))
+ return;
+
+ rtx mem = SET_SRC (load_set);
+ machine_mode reg_mode = GET_MODE (reg);
+ machine_mode mem_mode = GET_MODE (mem);
+ rtx mem_inner = mem;
+ unsigned int reg_regno = reg_or_subregno (reg);
+
+ /* Handle the fact that LWA is a DS format instruction, but LWZ is a D format
+ instruction. If the mem load is a signed SImode (i.e. LWA would be used)
+ we set mem_mode to DImode so that pcrel_opt_valid_mem_p() will check that
+ the address will work for a DS-form instruction. If it won't work, we skip
+ the optimization. The float loads are all indexed so there are no problems
+ there. */
+
+ if (GET_CODE (mem) == SIGN_EXTEND && GET_MODE (XEXP (mem, 0)) == SImode)
+ {
+ if (!INT_REGNO_P (reg_regno))
+ return;
+
+ mem_inner = XEXP (mem, 0);
+ mem_mode = DImode;
+ }
+
+ else if (GET_CODE (mem) == SIGN_EXTEND
+ || GET_CODE (mem) == ZERO_EXTEND
+ || GET_CODE (mem) == FLOAT_EXTEND)
+ {
+ mem_inner = XEXP (mem, 0);
+ mem_mode = GET_MODE (mem_inner);
+ }
+
+ if (!MEM_P (mem_inner))
+ return;
+
+ /* Can we do PCREL_OPT for this reference? */
+ if (!pcrel_opt_valid_mem_p (reg, mem_mode, mem_inner))
+ return;
+
+ /* Allocate a new PC-relative label, and update the load external address
+ insn.
+
+ If the register being loaded is different from the address register, we
+ need to indicate both registers are set at the load of the address.
+
+ (parallel [(set (reg load)
+ (unspec [(symbol_ref addr_symbol)
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_LD_ADDR))
+ (set (reg addr)
+ (unspec [(const_int 0)]
+ UNSPEC_PCREL_OPT_LD_DATA))])
+
+ If the register being loaded is the same as the address register, we use
+ an alternate form:
+
+ (set (reg load)
+ (unspec [(symbol_ref addr_symbol)
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_LD_SAME_REG)) */
+ unsigned int addr_regno = reg_or_subregno (addr_reg);
+ rtx label_num = GEN_INT (++pcrel_opt_next_num);
+ rtx reg_di = gen_rtx_REG (DImode, reg_regno);
+ rtx addr_pattern;
+
+ /* Create the load address, either using the pattern with an explicit clobber
+ if the address register is not the same as the register being loaded, or
+ using the pattern that requires the address register to be the address
+ loaded. */
+ if (addr_regno != reg_regno)
+ addr_pattern = gen_pcrel_opt_ld_addr (addr_reg, addr_symbol, label_num,
+ reg_di);
+ else
+ addr_pattern = gen_pcrel_opt_ld_addr_same_reg (addr_reg, addr_symbol,
+ label_num);
+
+ validate_change (addr_insn, &PATTERN (addr_insn), addr_pattern, false);
+
+ /* Update the load insn. If the mem had a sign/zero/float extend, add that
+ also after doing the UNSPEC. Add an explicit clobber of the external
+ address register just to make it clear that the address register dies.
+
+ (parallel [(set (reg:<MODE> data)
+ (unspec:<MODE> [(mem (addr_reg)
+ (reg:DI data)
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_LD_RELOC))
+ (clobber (reg:DI addr_reg))]) */
+ rtvec v_load = gen_rtvec (3, mem_inner, reg_di, label_num);
+ rtx new_load = gen_rtx_UNSPEC (GET_MODE (mem_inner), v_load,
+ UNSPEC_PCREL_OPT_LD_RELOC);
+
+ if (GET_CODE (mem) != GET_CODE (mem_inner))
+ new_load = gen_rtx_fmt_e (GET_CODE (mem), reg_mode, new_load);
+
+ rtx new_load_set = gen_rtx_SET (reg, new_load);
+ rtx load_clobber = gen_rtx_CLOBBER (VOIDmode,
+ (addr_regno == reg_regno
+ ? gen_rtx_SCRATCH (Pmode)
+ : addr_reg));
+ rtx new_load_pattern
+ = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_load_set, load_clobber));
+
+ validate_change (load_insn, &PATTERN (load_insn), new_load_pattern, false);
+
+ /* Attempt to apply the changes: */
+ if (!apply_change_group ())
+ {
+ /* PCREL_OPT load optimization did not succeed. */
+ counters.failed_loads++;
+ if (dump_file)
+ fprintf (dump_file,
+ "PCREL_OPT load failed (addr insn = %d, use insn = %d).\n",
+ INSN_UID (addr_insn),
+ INSN_UID (load_insn));
+ return;
+ }
+
+ /* PCREL_OPT load optimization succeeded. */
+ counters.loads++;
+ if (next_nonnote_insn (addr_insn) == load_insn)
+ counters.adjacent_loads++;
+
+ if (dump_file)
+ fprintf (dump_file,
+ "PCREL_OPT load (addr insn = %d, use insn = %d).\n",
+ INSN_UID (addr_insn),
+ INSN_UID (load_insn));
+
+ /* Because we have set DF_DEFER_INSN_RESCAN, we have to explicitly do it
+ after we have made changes to the insns. */
+ df_analyze ();
+
+}
+
+/* Optimize a PC-relative load address to be used in a store. Before calling
+ this function, pcrel_opt_address () uses DF to make sure it is safe to do
+ the PCREL_OPT optimization.
+
+ Convert insns of the form:
+
+ (set (reg:DI addr)
+ (symbol_ref:DI "ext_symbol"))
+
+ ...
+
+ (set (mem:<MODE> (reg:DI addr))
+ (reg:<MODE> value))
+
+ into:
+
+ (parallel [(set (reg:DI addr)
+ (unspec:DI [(symbol_ref:DI "ext_symbol")
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_ST_ADDR))
+ (use (reg:<MODE> value))])
+
+ ...
+
+ (parallel [(set (mem:<MODE> (reg:DI addr))
+ (unspec:<MODE> [(reg:<MODE>)
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (reg:DI addr))])
+
+ The UNSPEC_PCREL_OPT_ST_ADDR insn will generate the load address plus a
+ definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_ST_RELOC insn
+ will generate the .reloc to tell the linker to tie the load address and load
+ using that address together.
+
+ pld b,ext_symbol@got@pcrel
+ .Lpcrel1:
+
+ ...
+
+ .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+ stw r,0(b)
+
+ If ext_symbol is defined in another object file in the main program and we
+ are linking the main program, the linker will convert the above instructions
+ to:
+
+ pstwz r,ext_symbol@got@pcrel
+
+ ...
+
+ nop */
+
+static void
+pcrel_opt_store (rtx_insn *addr_insn, /* insn loading address. */
+ rtx_insn *store_insn) /* insn using address. */
+{
+ rtx addr_old_set = PATTERN (addr_insn);
+ gcc_assert (GET_CODE (addr_old_set) == SET);
+
+ rtx addr_reg = SET_DEST (addr_old_set);
+ gcc_assert (base_reg_operand (addr_reg, Pmode));
+
+ rtx addr_symbol = SET_SRC (addr_old_set);
+ gcc_assert (pcrel_external_address (addr_symbol, Pmode));
+
+ rtx store_set = PATTERN (store_insn);
+ gcc_assert (GET_CODE (store_set) == SET);
+
+ rtx mem = SET_DEST (store_set);
+ if (!MEM_P (mem))
+ return;
+
+ machine_mode mem_mode = GET_MODE (mem);
+ rtx reg = SET_SRC (store_set);
+
+ /* Don't allow storing the address of the external variable. */
+ if (reg_or_subregno (reg) == reg_or_subregno (addr_reg))
+ return;
+
+ /* Can we do PCREL_OPT for this reference? */
+ if (!pcrel_opt_valid_mem_p (reg, mem_mode, mem))
+ return;
+
+ /* Allocate a new PC-relative label, and update the load address insn.
+
+ (parallel [(set (reg addr)
+ (unspec [(symbol_ref symbol)
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_ST_ADDR))
+ (use (reg store))])
+ */
+ rtx label_num = GEN_INT (++pcrel_opt_next_num);
+ rtvec v_addr = gen_rtvec (2, addr_symbol, label_num);
+ rtx addr_unspec = gen_rtx_UNSPEC (Pmode, v_addr,
+ UNSPEC_PCREL_OPT_ST_ADDR);
+ rtx addr_new_set = gen_rtx_SET (addr_reg, addr_unspec);
+ rtx addr_use = gen_rtx_USE (VOIDmode, reg);
+ rtx addr_new_pattern
+ = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, addr_new_set, addr_use));
+
+ validate_change (addr_insn, &PATTERN (addr_insn), addr_new_pattern, false);
+
+ /* Update the store insn. Add an explicit clobber of the external address
+ register just to be sure there are no additional uses of the address
+ register.
+
+ (parallel [(set (mem (addr_reg)
+ (unspec:<MODE> [(reg)
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (reg:DI addr_reg))]) */
+ rtvec v_store = gen_rtvec (2, reg, label_num);
+ rtx new_store = gen_rtx_UNSPEC (mem_mode, v_store,
+ UNSPEC_PCREL_OPT_ST_RELOC);
+
+ rtx new_store_set = gen_rtx_SET (mem, new_store);
+ rtx store_clobber = gen_rtx_CLOBBER (VOIDmode, addr_reg);
+ rtx new_store_pattern
+ = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_store_set, store_clobber));
+
+ validate_change (store_insn, &PATTERN (store_insn), new_store_pattern, false);
+
+ /* Attempt to apply the changes: */
+ if (!apply_change_group ())
+ {
+ /* PCREL_OPT store failed. */
+ counters.failed_stores++;
+ if (dump_file)
+ fprintf (dump_file,
+ "PCREL_OPT store failed (addr insn = %d, use insn = %d).\n",
+ INSN_UID (addr_insn),
+ INSN_UID (store_insn));
+ return;
+ }
+
+ /* PCREL_OPT store succeeded. */
+ counters.stores++;
+ if (next_nonnote_insn (addr_insn) == store_insn)
+ counters.adjacent_stores++;
+
+ if (dump_file)
+ fprintf (dump_file,
+ "PCREL_OPT store (addr insn = %d, use insn = %d).\n",
+ INSN_UID (addr_insn),
+ INSN_UID (store_insn));
+
+ /* Because we have set DF_DEFER_INSN_RESCAN, we have to explicitly do it
+ after we have made changes to the insns. */
+ df_analyze();
+
+}
+
+/* Return the register used as the base register of MEM, if the instruction has
+ a pc-relative form. We look for BSWAP to rule out LFIWAX/LFIWZX/STFIWX, and
+ ROTATE/VEC_SELECT are RTX_EXTRA not RTX_UNARY which rules out lxvd2x. This
+ excludes instructions that do not have a pc-relative form. */
+
+static rtx
+get_mem_base_reg (rtx mem)
+{
+ const char * fmt;
+
+ while (!MEM_P (mem))
+ {
+ if (GET_RTX_CLASS (GET_CODE (mem)) != RTX_UNARY
+ || GET_CODE (mem) == BSWAP)
+ return NULL_RTX;
+ fmt = GET_RTX_FORMAT (GET_CODE (mem));
+ if (fmt[0] != 'e')
+ return NULL_RTX;
+ mem = XEXP (mem, 0);
+ if (mem == NULL_RTX )
+ return NULL_RTX;
+ }
+
+ if (!MEM_SIZE_KNOWN_P (mem))
+ return NULL_RTX;
+
+ rtx addr_rtx = (XEXP (mem, 0));
+ if (GET_CODE (addr_rtx) == PRE_MODIFY)
+ addr_rtx = XEXP (addr_rtx, 1);
+
+ while (GET_CODE (addr_rtx) == PLUS
+ && CONST_INT_P (XEXP (addr_rtx, 1)))
+ addr_rtx = XEXP (addr_rtx, 0);
+
+ if (!REG_P (addr_rtx))
+ return NULL_RTX;
+
+ return addr_rtx;
+}
+
+/* Check whether INSN contains a reference to REGNO that will inhibit the
+ PCREL_OPT optimization. If TYPE is a load or store instruction, return true
+ if there is a definition of REGNO. If TYPE is a load instruction, then
+ return true of there is a use of REGNO. */
+
+static bool
+insn_references_regno_p (rtx_insn *insn, unsigned int regno,
+ enum attr_type type)
+{
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+ df_ref ref;
+
+ /* Return true if there is a definition of REGNO. */
+ for (ref = DF_INSN_INFO_DEFS (insn_info); ref; ref = DF_REF_NEXT_LOC (ref))
+ if (DF_REF_REGNO (ref) == regno)
+ return true;
+
+ /* If type is a load, return true if there is a use of REGNO. */
+ if (type == TYPE_LOAD
+ || type == TYPE_FPLOAD
+ || type == TYPE_VECLOAD)
+ for (ref = DF_INSN_INFO_USES (insn_info); ref; ref = DF_REF_NEXT_LOC (ref))
+ if (DF_REF_REGNO (ref) == regno)
+ return true;
+
+ return false;
+}
+
+/* Given an insn that loads up a base register with the address of an
+ external symbol, see if we can optimize it with the PCREL_OPT
+ optimization.
+
+ DF is used to make sure that there is exactly one definition and one
+ non-debug use of the address register defined by the insn. The use insn must
+ be a non-prefix insn, and must also be in the same basic block as the address
+ insn.
+
+ ADDR_INSN is the insn that loads the external symbol address. */
+
+static void
+pcrel_opt_address (rtx_insn *addr_insn)
+{
+ counters.extern_addrs++;
+
+ /* Do some basic validation. */
+ rtx addr_set = PATTERN (addr_insn);
+ if (GET_CODE (addr_set) != SET)
+ return;
+
+ rtx addr_reg = SET_DEST (addr_set);
+ rtx addr_symbol = SET_SRC (addr_set);
+
+ if (!base_reg_operand (addr_reg, Pmode)
+ || !pcrel_external_address (addr_symbol, Pmode))
+ return;
+
+ /* The address register must have exactly one definition. */
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (addr_insn);
+ if (!insn_info)
+ return;
+
+ df_ref def = df_single_def (insn_info);
+ if (!def)
+ return;
+
+ /* Make sure there is at least one use. */
+ df_link *chain = DF_REF_CHAIN (def);
+ if (!chain || !chain->ref)
+ return;
+
+ /* Get the insn of the possible load or store. */
+ rtx_insn *use_insn = DF_REF_INSN (chain->ref);
+
+ /* Ensure there are no other uses. */
+ for (chain = chain->next; chain; chain = chain->next)
+ if (chain->ref && DF_REF_INSN_INFO (chain->ref))
+ {
+ gcc_assert (DF_REF_INSN (chain->ref));
+ if (NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
+ return;
+ }
+
+ /* The use instruction must be a single non-prefixed instruction. */
+ if (get_attr_length (use_insn) != 4)
+ return;
+
+ /* The address and the memory operation must be in the same basic block. */
+ if (BLOCK_FOR_INSN (use_insn) != BLOCK_FOR_INSN (addr_insn))
+ return;
+
+ /* If this isn't a simple SET, skip doing the optimization. */
+ if (GET_CODE (PATTERN (use_insn)) != SET)
+ return;
+
+ enum attr_type use_insn_type = get_attr_type (use_insn);
+ unsigned int use_regno;
+
+ /* Make sure the use_insn is using addr_reg as its base register
+ for the load or store, and determine the regno for the register
+ used in the use_insn. */
+ rtx use_dest, use_src;
+ switch (use_insn_type)
+ {
+ case TYPE_LOAD:
+ case TYPE_FPLOAD:
+ case TYPE_VECLOAD:
+ /* Make sure our address register is the same register used in the
+ base address of the load. */
+ if (addr_reg != get_mem_base_reg (SET_SRC (PATTERN (use_insn))))
+ return;
+ /* Make sure we are setting a register before we look at REGNO. */
+ use_dest = SET_DEST (PATTERN (use_insn));
+ if (!register_operand (use_dest, GET_MODE (use_dest)))
+ return;
+ use_regno = REGNO (use_dest);
+ break;
+ case TYPE_STORE:
+ case TYPE_FPSTORE:
+ case TYPE_VECSTORE:
+ /* Make sure our address register is the same register used in the
+ base address of the store. */
+ if (addr_reg != get_mem_base_reg (SET_DEST (PATTERN (use_insn))))
+ return;
+ /* Make sure this is a register before we look at REGNO. */
+ use_src = SET_SRC (PATTERN (use_insn));
+ if (!register_operand (use_src, GET_MODE (use_src)))
+ return;
+ use_regno = REGNO (use_src);
+ break;
+ default:
+ /* We can only optimize loads and stores. Ignore everything else. */
+ return;
+ }
+
+ rtx_insn *insn;
+ for (insn = NEXT_INSN (addr_insn);
+ insn != use_insn;
+ insn = NEXT_INSN (insn))
+ {
+ /* If we see a call, do not do the PCREL_OPT optimization. */
+ if (CALL_P (insn))
+ return;
+
+ /* Skip debug insns. */
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ /* See if it is a load or store. */
+ if (GET_CODE (PATTERN (insn)) != USE
+ && GET_CODE (PATTERN (insn)) != CLOBBER)
+ {
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LOAD:
+ /* While load of the external address is a 'load' for scheduling
+ purposes, it should be safe to allow loading other external
+ addresses between the load of the external address we are
+ currently looking at and the load or store using that
+ address. */
+ if (get_attr_loads_external_address (insn)
+ == LOADS_EXTERNAL_ADDRESS_YES)
+ break;
+ /* fall through */
+
+ case TYPE_FPLOAD:
+ case TYPE_VECLOAD:
+ /* Don't do the PCREL_OPT store optimization if there is a load
+ operation. For example, the load might be trying to load the
+ value being stored in between getting the address and doing
+ the store. */
+ if (use_insn_type == TYPE_STORE
+ || use_insn_type == TYPE_FPSTORE
+ || use_insn_type == TYPE_VECSTORE)
+ return;
+ break;
+
+ case TYPE_STORE:
+ case TYPE_FPSTORE:
+ case TYPE_VECSTORE:
+ /* Don't do the PCREL_OPT load optimization if there is a store
+ operation. Perhaps the store might be to the global variable
+ through a pointer. */
+ return;
+
+ case TYPE_LOAD_L:
+ case TYPE_STORE_C:
+ case TYPE_HTM:
+ case TYPE_HTMSIMPLE:
+ /* Don't do the optimization through atomic operations. */
+ return;
+
+ default:
+ break;
+ }
+ }
+
+ /* Check for invalid references of the non-address register that is
+ used in the load or store instruction. */
+ if (insn_references_regno_p (insn, use_regno, use_insn_type))
+ return;
+ }
+
+ /* Is this a load or a store? */
+ switch (use_insn_type)
+ {
+ case TYPE_LOAD:
+ case TYPE_FPLOAD:
+ case TYPE_VECLOAD:
+ pcrel_opt_load (addr_insn, use_insn);
+ break;
+
+ case TYPE_STORE:
+ case TYPE_FPSTORE:
+ case TYPE_VECSTORE:
+ pcrel_opt_store (addr_insn, use_insn);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Optimize pcrel external variable references. */
+
+static unsigned int
+pcrel_opt_pass (function *fun)
+{
+ basic_block bb;
+ rtx_insn *insn, *curr_insn = 0;
+
+ memset (&counters, 0, sizeof (counters));
+
+ /* Dataflow analysis for use-def chains. However we have to specify both UD
+ and DU as otherwise when we make changes to insns for the PCREL_OPT there
+ will be dangling references. */
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+ df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
+ df_note_add_problem ();
+ df_analyze ();
+
+ /* Set the defer flag as our pattern of operation will be to modify two insns,
+ then call df_analyze (). */
+ df_set_flags (DF_DEFER_INSN_RESCAN | DF_LR_RUN_DCE);
+
+ if (dump_file)
+ fprintf (dump_file, "\n");
+
+ /* Look at each basic block to see if there is a load of an external
+ variable's external address, and a single load/store using that external
+ address. */
+ FOR_ALL_BB_FN (bb, fun)
+ {
+ FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
+ {
+ if (NONJUMP_INSN_P (insn)
+ && single_set (insn)
+ && get_attr_loads_external_address (insn)
+ == LOADS_EXTERNAL_ADDRESS_YES)
+ pcrel_opt_address (insn);
+ }
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "\n# of loads of an address of an external symbol = %lu\n",
+ counters.extern_addrs);
+
+ fprintf (dump_file, "# of PCREL_OPT loads = %lu (adjacent %lu)\n",
+ counters.loads, counters.adjacent_loads);
+
+ if (counters.failed_loads)
+ fprintf (dump_file, "# of failed PCREL_OPT loads = %lu\n",
+ counters.failed_loads);
+
+ fprintf (dump_file, "# of PCREL_OPT stores = %lu (adjacent %lu)\n",
+ counters.stores, counters.adjacent_stores);
+
+ if (counters.failed_stores)
+ fprintf (dump_file, "# of failed PCREL_OPT stores = %lu\n",
+ counters.failed_stores);
+
+ fprintf (dump_file, "\n");
+ }
+
+ df_remove_problem (df_chain);
+ df_process_deferred_rescans ();
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS | DF_LR_RUN_DCE);
+ df_analyze ();
+ return 0;
+}
+
+/* Optimize pc-relative references for the new PCREL_OPT pass. */
+const pass_data pass_data_pcrel_opt =
+{
+ RTL_PASS, /* type. */
+ "pcrel_opt", /* name. */
+ OPTGROUP_NONE, /* optinfo_flags. */
+ TV_NONE, /* tv_id. */
+ 0, /* properties_required. */
+ 0, /* properties_provided. */
+ 0, /* properties_destroyed. */
+ 0, /* todo_flags_start. */
+ TODO_df_finish, /* todo_flags_finish. */
+};
+
+/* Pass data structures. */
+class pcrel_opt : public rtl_opt_pass
+{
+public:
+ pcrel_opt (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_pcrel_opt, ctxt)
+ {}
+
+ ~pcrel_opt (void)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return (TARGET_PCREL && TARGET_PCREL_OPT && optimize);
+ }
+
+ virtual unsigned int execute (function *fun)
+ {
+ return pcrel_opt_pass (fun);
+ }
+
+ opt_pass *clone ()
+ {
+ return new pcrel_opt (m_ctxt);
+ }
+};
+
+rtl_opt_pass *
+make_pass_pcrel_opt (gcc::context *ctxt)
+{
+ return new pcrel_opt (ctxt);
+}
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index d9d44fe..203660b 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -193,10 +193,13 @@ extern enum insn_form address_to_insn_form (rtx, machine_mode,
enum non_prefixed_form);
extern bool address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
enum non_prefixed_form non_prefix_format);
+extern bool pcrel_opt_valid_mem_p (rtx, machine_mode, rtx);
+enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
extern bool prefixed_load_p (rtx_insn *);
extern bool prefixed_store_p (rtx_insn *);
extern bool prefixed_paddi_p (rtx_insn *);
extern void rs6000_asm_output_opcode (FILE *);
+extern void output_pcrel_opt_reloc (rtx);
extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int);
extern int rs6000_adjust_insn_length (rtx_insn *, int);
@@ -309,6 +312,7 @@ namespace gcc { class context; }
class rtl_opt_pass;
extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
+extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
extern bool rs6000_quadword_masked_address_p (const_rtx exp);
extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ec068c5..46ddf49 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1173,7 +1173,6 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
machine_mode,
secondary_reload_info *,
bool);
-static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
/* Hash table stuff for keeping track of TOC entries. */
@@ -3413,9 +3412,10 @@ rs6000_builtin_mask_calculate (void)
not such a great idea. */
static rtx_insn *
-rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
- vec<const char *> &/*constraints*/,
- vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
+rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
+ vec<machine_mode> & /*input_modes*/,
+ vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
+ HARD_REG_SET &clobbered_regs)
{
clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
@@ -4452,6 +4452,9 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags &= ~OPTION_MASK_MMA;
}
+ if (!TARGET_PCREL && TARGET_PCREL_OPT)
+ rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
+
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
@@ -7852,7 +7855,8 @@ rs6000_special_round_type_align (tree type, unsigned int computed,
while (TREE_CODE (type) == ARRAY_TYPE)
type = TREE_TYPE (type);
- if (type != error_mark_node && TYPE_MODE (type) == DFmode)
+ if (type != error_mark_node
+ && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
align = MAX (align, 64);
}
@@ -8985,8 +8989,57 @@ rs6000_delegitimize_address (rtx orig_x)
{
rtx x, y, offset;
- if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
- orig_x = XVECEXP (orig_x, 0, 0);
+ /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
+ encodes loading up the high part of the address of a TOC reference along
+ with a load of a GPR using the same base register used for the load. We
+ return the original SYMBOL_REF.
+
+ (set (reg:INT1 <reg>
+ (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
+
+ UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
+ UNSPECs include the external SYMBOL_REF along with the value being loaded.
+ We return the original SYMBOL_REF.
+
+ (parallel [(set (reg:DI <base-reg>)
+ (unspec:DI [(symbol_ref <symbol>)
+ (const_int <marker>)]
+ UNSPEC_PCREL_OPT_LD_ADDR))
+ (set (reg:DI <load-reg>)
+ (unspec:DI [(const_int 0)]
+ UNSPEC_PCREL_OPT_LD_DATA))])
+
+ UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
+ GPR being loaded is the same as the GPR used to hold the external address.
+
+ (set (reg:DI <base-reg>)
+ (unspec:DI [(symbol_ref <symbol>)
+ (const_int <marker>)]
+ UNSPEC_PCREL_OPT_LD_SAME_REG))
+
+ UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
+ UNSPEC include the external SYMBOL_REF along with the value being loaded.
+ We return the original SYMBOL_REF.
+
+ (parallel [(set (reg:DI <base-reg>)
+ (unspec:DI [(symbol_ref <symbol>)
+ (const_int <marker>)]
+ UNSPEC_PCREL_OPT_ST_ADDR))
+ (use (reg <store-reg>))]) */
+
+ if (GET_CODE (orig_x) == UNSPEC)
+ switch (XINT (orig_x, 1))
+ {
+ case UNSPEC_FUSION_GPR:
+ case UNSPEC_PCREL_OPT_LD_ADDR:
+ case UNSPEC_PCREL_OPT_LD_SAME_REG:
+ case UNSPEC_PCREL_OPT_ST_ADDR:
+ orig_x = XVECEXP (orig_x, 0, 0);
+ break;
+
+ default:
+ break;
+ }
orig_x = delegitimize_mem_from_attrs (orig_x);
@@ -10059,6 +10112,9 @@ rs6000_const_vec (machine_mode mode)
void
rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
{
+ gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
+ gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
+
/* Scalar permutations are easier to express in integer modes rather than
floating-point modes, so cast them here. We use V1TImode instead
of TImode to ensure that the values don't go through GPRs. */
@@ -21205,8 +21261,6 @@ rs6000_xcoff_file_start (void)
main_input_filename, ".ro_");
rs6000_gen_section_name (&xcoff_tls_data_section_name,
main_input_filename, ".tls_");
- rs6000_gen_section_name (&xcoff_tbss_section_name,
- main_input_filename, ".tbss_[UL]");
fputs ("\t.file\t", asm_out_file);
output_quoted_string (asm_out_file, main_input_filename);
@@ -23788,6 +23842,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "mulhw", OPTION_MASK_MULHW, false, true },
{ "multiple", OPTION_MASK_MULTIPLE, false, true },
{ "pcrel", OPTION_MASK_PCREL, false, true },
+ { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
{ "popcntb", OPTION_MASK_POPCNTB, false, true },
{ "popcntd", OPTION_MASK_POPCNTD, false, true },
{ "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
@@ -25932,6 +25987,32 @@ address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
return false;
}
+/* Return true if an REG with a given MODE is loaded from or stored into a MEM
+ location uses a non-prefixed D/DS/DQ-form address. This is used to validate
+ the load or store with the PCREL_OPT optimization to make sure it is an
+ instruction that can be optimized.
+
+ We need to specify the MODE separately from the REG to allow for loads that
+ include zero/sign/float extension. */
+
+bool
+pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
+{
+ /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
+ PCREL_OPT optimization. */
+ enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
+ if (non_prefixed == NON_PREFIXED_X)
+ return false;
+
+ /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
+ rtx addr = XEXP (mem, 0);
+ enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
+ return (iform == INSN_FORM_BASE_REG
+ || iform == INSN_FORM_D
+ || iform == INSN_FORM_DS
+ || iform == INSN_FORM_DQ);
+}
+
/* Helper function to see if we're potentially looking at lfs/stfs.
- PARALLEL containing a SET and a CLOBBER
- stfs:
@@ -25990,7 +26071,7 @@ is_lfs_stfs_insn (rtx_insn *insn)
/* Helper function to take a REG and a MODE and turn it into the non-prefixed
instruction format (D/DS/DQ) used for offset memory. */
-static enum non_prefixed_form
+enum non_prefixed_form
reg_to_non_prefixed (rtx reg, machine_mode mode)
{
/* If it isn't a register, use the defaults. */
@@ -26191,7 +26272,7 @@ prefixed_paddi_p (rtx_insn *insn)
/* Whether the next instruction needs a 'p' prefix issued before the
instruction is printed out. */
-static bool next_insn_prefixed_p;
+static bool prepend_p_to_next_insn;
/* Define FINAL_PRESCAN_INSN if some processing needs to be done before
outputting the assembler code. On the PowerPC, we remember if the current
@@ -26202,7 +26283,7 @@ static bool next_insn_prefixed_p;
void
rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
{
- next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
+ prepend_p_to_next_insn = (get_attr_prefixed (insn) != PREFIXED_NO);
return;
}
@@ -26212,12 +26293,35 @@ rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
void
rs6000_asm_output_opcode (FILE *stream)
{
- if (next_insn_prefixed_p)
- fprintf (stream, "p");
+ if (prepend_p_to_next_insn)
+ {
+ fprintf (stream, "p");
+
+ /* Reset the flag in the case where there are separate insn lines in the
+ sequence, so the 'p' is only emitted for the first line. This shows up
+ when we are doing the PCREL_OPT optimization, in that the label created
+ with %r<n> would have a leading 'p' printed. */
+ prepend_p_to_next_insn = false;
+ }
return;
}
+/* Emit the relocation to tie the next instruction to a previous instruction
+ that loads up an external address. This is used to do the PCREL_OPT
+ optimization. Note, the label is generated after the PLD of the got
+ pc-relative address to allow for the assembler to insert NOPs before the PLD
+ instruction. The operand is a constant integer that is the label
+ number. */
+
+void
+output_pcrel_opt_reloc (rtx label_num)
+{
+ rtx operands[1] = { label_num };
+ output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
+ operands);
+}
+
/* Adjust the length of an INSN. LENGTH is the currently-computed length and
should be adjusted to reflect any required changes. This macro is used when
there is some systematic length adjustment required that would be difficult
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a131552..c0d7b1a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -209,7 +209,7 @@
;; What data size does this instruction work on?
;; This is used for insert, mul and others as necessary.
-(define_attr "size" "8,16,32,64,128" (const_string "32"))
+(define_attr "size" "8,16,32,64,128,256" (const_string "32"))
;; What is the insn_cost for this insn? The target hook can still override
;; this. For optimizing for size the "length" attribute is used instead.
@@ -292,6 +292,10 @@
(const_string "no")))
+;; Whether an insn loads an external address for the PCREL_OPT optimizaton.
+(define_attr "loads_external_address" "no,yes"
+ (const_string "no"))
+
;; Return the number of real hardware instructions in a combined insn. If it
;; is 0, just use the length / 4.
(define_attr "num_insns" "" (const_int 0))
@@ -671,6 +675,7 @@
;; How many bits (per element) in this mode?
(define_mode_attr bits [(QI "8") (HI "16") (SI "32") (DI "64")
(SF "32") (DF "64")
+ (DD "64") (TD "128")
(V4SI "32") (V2DI "64")])
; DImode bits
@@ -4068,7 +4073,7 @@
[(set_attr "type" "insert")])
; There are also some forms without one of the ANDs.
-(define_insn "*rotl<mode>3_insert_3"
+(define_insn "rotl<mode>3_insert_3"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0")
(match_operand:GPR 4 "const_int_operand" "n"))
@@ -4083,6 +4088,24 @@
}
[(set_attr "type" "insert")])
+(define_code_iterator plus_ior_xor [plus ior xor])
+
+(define_split
+ [(set (match_operand:GPR 0 "gpc_reg_operand")
+ (plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
+ (match_operand:SI 2 "const_int_operand"))
+ (match_operand:GPR 3 "gpc_reg_operand")))]
+ "nonzero_bits (operands[3], <MODE>mode)
+ < HOST_WIDE_INT_1U << INTVAL (operands[2])"
+ [(set (match_dup 0)
+ (ior:GPR (and:GPR (match_dup 3)
+ (match_dup 4))
+ (ashift:GPR (match_dup 1)
+ (match_dup 2))))]
+{
+ operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
+})
+
(define_insn "*rotl<mode>3_insert_4"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0")
@@ -9991,7 +10014,7 @@
(unspec:SI [(const_int 0)] UNSPEC_TLSTLS))
(clobber (reg:SI LR_REGNO))]
"TARGET_XCOFF && HAVE_AS_TLS"
- "bla __get_tpointer")
+ "bla .__get_tpointer")
(define_expand "tls_get_addr<mode>"
[(set (match_operand:P 0 "gpc_reg_operand")
@@ -10016,7 +10039,7 @@
(clobber (reg:CC CR0_REGNO))
(clobber (reg:P LR_REGNO))]
"TARGET_XCOFF && HAVE_AS_TLS"
- "bla __tls_get_addr")
+ "bla .__tls_get_addr")
;; Next come insns related to the calling sequence.
;;
@@ -10243,7 +10266,8 @@
"TARGET_PCREL"
"ld %0,%a1"
[(set_attr "prefixed" "yes")
- (set_attr "type" "load")])
+ (set_attr "type" "load")
+ (set_attr "loads_external_address" "yes")])
;; TOC register handling.
@@ -14928,3 +14952,4 @@
(include "crypto.md")
(include "htm.md")
(include "fusion.md")
+(include "pcrel-opt.md")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index ae9e91e..0dbdf75 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -609,6 +609,10 @@ mpcrel
Target Mask(PCREL) Var(rs6000_isa_flags)
Generate (do not generate) pc-relative memory addressing.
+mpcrel-opt
+Target Undocumented Mask(PCREL_OPT) Var(rs6000_isa_flags)
+Generate (do not generate) pc-relative memory optimizations for externals.
+
mmma
Target Mask(MMA) Var(rs6000_isa_flags)
Generate (do not generate) MMA instructions.
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
index 11e4c03..40629dd 100644
--- a/gcc/config/rs6000/sync.md
+++ b/gcc/config/rs6000/sync.md
@@ -131,6 +131,7 @@
&& !reg_mentioned_p (operands[0], operands[1])"
"lq %0,%1"
[(set_attr "type" "load")
+ (set_attr "size" "128")
(set (attr "prefixed") (if_then_else (match_test "TARGET_PREFIXED")
(const_string "yes")
(const_string "no")))])
@@ -205,6 +206,7 @@
"TARGET_SYNC_TI"
"stq %1,%0"
[(set_attr "type" "store")
+ (set_attr "size" "128")
(set (attr "prefixed") (if_then_else (match_test "TARGET_PREFIXED")
(const_string "yes")
(const_string "no")))])
@@ -333,7 +335,8 @@
&& !reg_mentioned_p (operands[0], operands[1])
&& quad_int_reg_operand (operands[0], PTImode)"
"lqarx %0,%y1"
- [(set_attr "type" "load_l")])
+ [(set_attr "type" "load_l")
+ (set_attr "size" "128")])
(define_insn "store_conditional<mode>"
[(set (match_operand:CC 0 "cc_reg_operand" "=x")
@@ -394,7 +397,8 @@
(match_operand:PTI 2 "quad_int_reg_operand" "r"))]
"TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
"stqcx. %2,%y1"
- [(set_attr "type" "store_c")])
+ [(set_attr "type" "store_c")
+ (set_attr "size" "128")])
(define_expand "atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "int_reg_operand") ;; bool out
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1541a65..44f7ffb 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -23,6 +23,10 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def
TM_H += $(srcdir)/config/rs6000/rs6000-modes.h
PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def
+rs6000-pcrel-opt.o: $(srcdir)/config/rs6000/rs6000-pcrel-opt.c
+ $(COMPILE) $<
+ $(POSTCOMPILE)
+
rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c
$(COMPILE) $<
$(POSTCOMPILE)
@@ -90,4 +94,5 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
$(srcdir)/config/rs6000/crypto.md \
$(srcdir)/config/rs6000/htm.md \
$(srcdir)/config/rs6000/dfp.md \
- $(srcdir)/config/rs6000/fusion.md
+ $(srcdir)/config/rs6000/fusion.md \
+ $(srcdir)/config/rs6000/pcrel-opt.md
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 3e05186..a1fa4f9 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -987,11 +987,13 @@
(define_insn_and_split "*vsx_le_perm_load_<mode>"
[(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
(match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
+ && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
"@
#
#"
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
+ && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
[(const_int 0)]
{
rtx tmp = (can_create_pseudo_p ()
@@ -1008,7 +1010,8 @@
(define_insn "*vsx_le_perm_store_<mode>"
[(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
(match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
+ & !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
"@
#
#"
@@ -1019,7 +1022,8 @@
(define_split
[(set (match_operand:VSX_LE_128 0 "memory_operand")
(match_operand:VSX_LE_128 1 "vsx_register_operand"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR
+ && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
[(const_int 0)]
{
rtx tmp = (can_create_pseudo_p ()
@@ -1075,7 +1079,8 @@
(define_split
[(set (match_operand:VSX_LE_128 0 "memory_operand")
(match_operand:VSX_LE_128 1 "vsx_register_operand"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR
+ && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
[(const_int 0)]
{
rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
@@ -1241,7 +1246,8 @@
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
/* Expand to swaps if needed, prior to swap optimization. */
- if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
+ if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
+ && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode))
{
rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
DONE;
@@ -1254,7 +1260,8 @@
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
/* Expand to swaps if needed, prior to swap optimization. */
- if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
+ if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
+ && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode))
{
rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
DONE;
@@ -3030,28 +3037,22 @@
(use (match_operand:SI 4 "gpc_reg_operand"))]
"VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
{
- rtx a = gen_reg_rtx (DImode);
- rtx b = gen_reg_rtx (DImode);
- rtx c = gen_reg_rtx (DImode);
- rtx d = gen_reg_rtx (DImode);
- emit_insn (gen_zero_extendsidi2 (a, operands[1]));
- emit_insn (gen_zero_extendsidi2 (b, operands[2]));
- emit_insn (gen_zero_extendsidi2 (c, operands[3]));
- emit_insn (gen_zero_extendsidi2 (d, operands[4]));
+ rtx a = gen_lowpart_SUBREG (DImode, operands[1]);
+ rtx b = gen_lowpart_SUBREG (DImode, operands[2]);
+ rtx c = gen_lowpart_SUBREG (DImode, operands[3]);
+ rtx d = gen_lowpart_SUBREG (DImode, operands[4]);
if (!BYTES_BIG_ENDIAN)
{
std::swap (a, b);
std::swap (c, d);
}
- rtx aa = gen_reg_rtx (DImode);
rtx ab = gen_reg_rtx (DImode);
- rtx cc = gen_reg_rtx (DImode);
rtx cd = gen_reg_rtx (DImode);
- emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
- emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
- emit_insn (gen_iordi3 (ab, aa, b));
- emit_insn (gen_iordi3 (cd, cc, d));
+ emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b,
+ GEN_INT (0xffffffff)));
+ emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d,
+ GEN_INT (0xffffffff)));
rtx abcd = gen_reg_rtx (V2DImode);
emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
diff --git a/gcc/config/rs6000/xcoff.h b/gcc/config/rs6000/xcoff.h
index c016678..cb9aae7 100644
--- a/gcc/config/rs6000/xcoff.h
+++ b/gcc/config/rs6000/xcoff.h
@@ -255,11 +255,11 @@
} while (0)
#ifdef HAVE_AS_TLS
-#define ASM_OUTPUT_TLS_COMMON(FILE, DECL, NAME, SIZE) \
- do { fputs (COMMON_ASM_OP, (FILE)); \
- RS6000_OUTPUT_BASENAME ((FILE), (NAME)); \
- fprintf ((FILE), "[UL]," HOST_WIDE_INT_PRINT_UNSIGNED"\n", \
- (SIZE)); \
+#define ASM_OUTPUT_TLS_COMMON(FILE, DECL, NAME, SIZE) \
+ do { fputs (LOCAL_COMMON_ASM_OP, (FILE)); \
+ fprintf ((FILE), "%s," HOST_WIDE_INT_PRINT_UNSIGNED",%s[UL],3\n", \
+ (*targetm.strip_name_encoding) (NAME), (SIZE), \
+ (*targetm.strip_name_encoding) (NAME)); \
} while (0)
#endif
diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h
index 8e23e31..4078440 100644
--- a/gcc/config/rx/rx.h
+++ b/gcc/config/rx/rx.h
@@ -629,6 +629,9 @@ typedef unsigned int CUMULATIVE_ARGS;
#define PREFERRED_DEBUGGING_TYPE (TARGET_AS100_SYNTAX \
? DBX_DEBUG : DWARF2_DEBUG)
+#define DBX_DEBUGGING_INFO 1
+#define DWARF2_DEBUGGING_INFO 1
+
#define INCOMING_FRAME_SP_OFFSET 4
#define ARG_POINTER_CFA_OFFSET(FNDECL) 4
diff --git a/gcc/config/s390/driver-native.c b/gcc/config/s390/driver-native.c
index 4a065a5..c024715 100644
--- a/gcc/config/s390/driver-native.c
+++ b/gcc/config/s390/driver-native.c
@@ -124,7 +124,7 @@ s390_host_detect_local_cpu (int argc, const char **argv)
cpu = "z15";
break;
default:
- cpu = "z15";
+ cpu = "arch14";
break;
}
}
diff --git a/gcc/config/s390/s390-builtin-types.def b/gcc/config/s390/s390-builtin-types.def
index a2b7d4a..52ef572 100644
--- a/gcc/config/s390/s390-builtin-types.def
+++ b/gcc/config/s390/s390-builtin-types.def
@@ -267,6 +267,7 @@ DEF_FN_TYPE_2 (BT_FN_V2DI_V4SI_V4SI, BT_V2DI, BT_V4SI, BT_V4SI)
DEF_FN_TYPE_2 (BT_FN_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT)
DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR)
DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
+DEF_FN_TYPE_2 (BT_FN_V4SF_V8HI_UINT, BT_V4SF, BT_V8HI, BT_UINT)
DEF_FN_TYPE_2 (BT_FN_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI)
DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDCONSTPTR, BT_V4SI, BT_INT, BT_VOIDCONSTPTR)
DEF_FN_TYPE_2 (BT_FN_V4SI_UV4SI_UV4SI, BT_V4SI, BT_UV4SI, BT_UV4SI)
@@ -278,6 +279,7 @@ DEF_FN_TYPE_2 (BT_FN_V8HI_BV8HI_V8HI, BT_V8HI, BT_BV8HI, BT_V8HI)
DEF_FN_TYPE_2 (BT_FN_V8HI_UV8HI_UV8HI, BT_V8HI, BT_UV8HI, BT_UV8HI)
DEF_FN_TYPE_2 (BT_FN_V8HI_V16QI_V16QI, BT_V8HI, BT_V16QI, BT_V16QI)
DEF_FN_TYPE_2 (BT_FN_V8HI_V4SI_V4SI, BT_V8HI, BT_V4SI, BT_V4SI)
+DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI_UINT, BT_V8HI, BT_V8HI, BT_UINT)
DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI)
DEF_FN_TYPE_2 (BT_FN_VOID_UINT64PTR_UINT64, BT_VOID, BT_UINT64PTR, BT_UINT64)
DEF_FN_TYPE_2 (BT_FN_VOID_V2DF_FLTPTR, BT_VOID, BT_V2DF, BT_FLTPTR)
@@ -345,6 +347,7 @@ DEF_FN_TYPE_3 (BT_FN_V4SI_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_V4SI)
DEF_FN_TYPE_3 (BT_FN_V4SI_V8HI_V8HI_V4SI, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI)
DEF_FN_TYPE_3 (BT_FN_V8HI_UV8HI_UV8HI_INTPTR, BT_V8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_V8HI_V16QI_V16QI_V8HI, BT_V8HI, BT_V16QI, BT_V16QI, BT_V8HI)
+DEF_FN_TYPE_3 (BT_FN_V8HI_V4SF_V4SF_UINT, BT_V8HI, BT_V4SF, BT_V4SF, BT_UINT)
DEF_FN_TYPE_3 (BT_FN_V8HI_V4SI_V4SI_INTPTR, BT_V8HI, BT_V4SI, BT_V4SI, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_V8HI_V8HI_V8HI_INTPTR, BT_V8HI, BT_V8HI, BT_V8HI, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_V8HI_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI)
diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
index deb205b..129d712 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -273,6 +273,7 @@
#undef B_VXE
#undef B_VXE2
#undef B_DEP
+#undef B_NNPA
#undef BFLAGS_MASK_INIT
#define BFLAGS_MASK_INIT (B_INT)
@@ -283,6 +284,7 @@
#define B_VXE (1 << 3) /* Builtins requiring the z14 vector extensions. */
#define B_VXE2 (1 << 4) /* Builtins requiring the z15 vector extensions. */
#define B_DEP (1 << 5) /* Builtin has been deprecated and a warning should be issued. */
+#define B_NNPA (1 << 6) /* Builtins requiring the NNPA Facility. */
/* B_DEF defines a standard (not overloaded) builtin
B_DEF (<builtin name>, <RTL expander name>, <function attributes>, <builtin flags>, <operand flags, see above>, <fntype>)
@@ -3005,3 +3007,13 @@ OB_DEF_VAR (s390_vstrsz_u32, s390_vstrszf, 0,
B_DEF (s390_vstrszb, vstrszv16qi, 0, B_VXE2, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR)
B_DEF (s390_vstrszh, vstrszv8hi, 0, B_VXE2, 0, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR)
B_DEF (s390_vstrszf, vstrszv4si, 0, B_VXE2, 0, BT_FN_UV4SI_UV4SI_UV4SI_UV8HI_INTPTR)
+
+/* arch 14 builtins */
+
+B_DEF (s390_vclfnhs, vclfnhs_v8hi, 0, B_NNPA, O3_U4, BT_FN_V4SF_V8HI_UINT)
+B_DEF (s390_vclfnls, vclfnls_v8hi, 0, B_NNPA, O3_U4, BT_FN_V4SF_V8HI_UINT)
+
+B_DEF (s390_vcrnfs, vcrnfs_v8hi, 0, B_NNPA, O4_U4, BT_FN_V8HI_V4SF_V4SF_UINT)
+
+B_DEF (s390_vcfn, vcfn_v8hi, 0, B_NNPA, O3_U4, BT_FN_V8HI_V8HI_UINT)
+B_DEF (s390_vcnf, vcnf_v8hi, 0, B_NNPA, O3_U4, BT_FN_V8HI_V8HI_UINT)
diff --git a/gcc/config/s390/s390-c.c b/gcc/config/s390/s390-c.c
index a5f5f56..7dbd8bf 100644
--- a/gcc/config/s390/s390-c.c
+++ b/gcc/config/s390/s390-c.c
@@ -339,7 +339,7 @@ s390_cpu_cpp_builtins_internal (cpp_reader *pfile,
s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_OPT_VX), old_opts,
opts, "__VX__", "__VX__");
s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_ZVECTOR), old_opts,
- opts, "__VEC__=10303", "__VEC__");
+ opts, "__VEC__=10304", "__VEC__");
s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_ZVECTOR), old_opts,
opts, "__vector=__attribute__((vector_size(16)))",
"__vector__");
diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h
index d575180..4141b4d 100644
--- a/gcc/config/s390/s390-opts.h
+++ b/gcc/config/s390/s390-opts.h
@@ -38,6 +38,7 @@ enum processor_type
PROCESSOR_2964_Z13,
PROCESSOR_3906_Z14,
PROCESSOR_8561_Z15,
+ PROCESSOR_ARCH14,
PROCESSOR_NATIVE,
PROCESSOR_max
};
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 9d2cee9..151136b 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -337,6 +337,7 @@ const struct s390_processor processor_table[] =
{ "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 },
{ "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
{ "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
+ { "arch14", "arch14", PROCESSOR_ARCH14, &zEC12_cost, 14 },
{ "native", "", PROCESSOR_NATIVE, NULL, 0 }
};
@@ -826,6 +827,12 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
error ("Builtin %qF requires z15 or higher.", fndecl);
return const0_rtx;
}
+
+ if ((bflags & B_NNPA) && !TARGET_NNPA)
+ {
+ error ("Builtin %qF requires arch14 or higher.", fndecl);
+ return const0_rtx;
+ }
}
if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
&& fcode < S390_ALL_BUILTIN_MAX)
@@ -6562,6 +6569,7 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
{
+ cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2);
switch (cond)
{
/* NE a != b -> !(a == b) */
@@ -6600,6 +6608,19 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
}
else
{
+ /* Turn x < 0 into x >> (bits per element - 1) */
+ if (cond == LT && cmp_op2 == CONST0_RTX (mode))
+ {
+ int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1;
+ rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1,
+ GEN_INT (shift), target,
+ 0, OPTAB_DIRECT);
+ if (res != target)
+ emit_move_insn (target, res);
+ return;
+ }
+ cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2);
+
switch (cond)
{
/* NE: a != b -> !(a == b) */
@@ -6817,11 +6838,7 @@ s390_expand_vcond (rtx target, rtx then, rtx els,
if (!REG_P (cmp_op1))
cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
- if (!REG_P (cmp_op2))
- cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
-
- s390_expand_vec_compare (result_target, cond,
- cmp_op1, cmp_op2);
+ s390_expand_vec_compare (result_target, cond, cmp_op1, cmp_op2);
/* If the results are supposed to be either -1 or 0 we are done
since this is what our compare instructions generate anyway. */
@@ -8409,6 +8426,7 @@ s390_issue_rate (void)
case PROCESSOR_2827_ZEC12:
case PROCESSOR_2964_Z13:
case PROCESSOR_3906_Z14:
+ case PROCESSOR_ARCH14:
default:
return 1;
}
@@ -14768,6 +14786,7 @@ s390_get_sched_attrmask (rtx_insn *insn)
mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
break;
case PROCESSOR_8561_Z15:
+ case PROCESSOR_ARCH14:
if (get_attr_z15_cracked (insn))
mask |= S390_SCHED_ATTR_MASK_CRACKED;
if (get_attr_z15_expanded (insn))
@@ -14815,6 +14834,7 @@ s390_get_unit_mask (rtx_insn *insn, int *units)
mask |= 1 << 3;
break;
case PROCESSOR_8561_Z15:
+ case PROCESSOR_ARCH14:
*units = 4;
if (get_attr_z15_unit_lsu (insn))
mask |= 1 << 0;
@@ -16688,6 +16708,89 @@ s390_shift_truncation_mask (machine_mode mode)
return mode == DImode || mode == SImode ? 63 : 0;
}
+/* Return TRUE iff CONSTRAINT is an "f" constraint, possibly with additional
+ modifiers. */
+
+static bool
+f_constraint_p (const char *constraint)
+{
+ for (size_t i = 0, c_len = strlen (constraint); i < c_len;
+ i += CONSTRAINT_LEN (constraint[i], constraint + i))
+ {
+ if (constraint[i] == 'f')
+ return true;
+ }
+ return false;
+}
+
+/* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f"
+ constraints when long doubles are stored in vector registers. */
+
+static rtx_insn *
+s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
+ vec<machine_mode> &input_modes,
+ vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
+ HARD_REG_SET & /*clobbered_regs*/)
+{
+ if (!TARGET_VXE)
+ /* Long doubles are stored in FPR pairs - nothing to do. */
+ return NULL;
+
+ rtx_insn *after_md_seq = NULL, *after_md_end = NULL;
+
+ unsigned ninputs = inputs.length ();
+ unsigned noutputs = outputs.length ();
+ for (unsigned i = 0; i < noutputs; i++)
+ {
+ if (GET_MODE (outputs[i]) != TFmode)
+ /* Not a long double - nothing to do. */
+ continue;
+ const char *constraint = constraints[i];
+ bool allows_mem, allows_reg, is_inout;
+ bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs,
+ &allows_mem, &allows_reg, &is_inout);
+ gcc_assert (ok);
+ if (!f_constraint_p (constraint))
+ /* Long double with a constraint other than "=f" - nothing to do. */
+ continue;
+ gcc_assert (allows_reg);
+ gcc_assert (!is_inout);
+ /* Copy output value from a FPR pair into a vector register. */
+ rtx fprx2 = gen_reg_rtx (FPRX2mode);
+ push_to_sequence2 (after_md_seq, after_md_end);
+ emit_insn (gen_fprx2_to_tf (outputs[i], fprx2));
+ after_md_seq = get_insns ();
+ after_md_end = get_last_insn ();
+ end_sequence ();
+ outputs[i] = fprx2;
+ }
+
+ for (unsigned i = 0; i < ninputs; i++)
+ {
+ if (GET_MODE (inputs[i]) != TFmode)
+ /* Not a long double - nothing to do. */
+ continue;
+ const char *constraint = constraints[noutputs + i];
+ bool allows_mem, allows_reg;
+ bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
+ constraints.address (), &allows_mem,
+ &allows_reg);
+ gcc_assert (ok);
+ if (!f_constraint_p (constraint))
+ /* Long double with a constraint other than "f" (or "=f" for inout
+ operands) - nothing to do. */
+ continue;
+ gcc_assert (allows_reg);
+ /* Copy input value from a vector register into a FPR pair. */
+ rtx fprx2 = gen_reg_rtx (FPRX2mode);
+ emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i]));
+ inputs[i] = fprx2;
+ input_modes[i] = FPRX2mode;
+ }
+
+ return after_md_seq;
+}
+
/* Initialize GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
@@ -16995,6 +17098,9 @@ s390_shift_truncation_mask (machine_mode mode)
#undef TARGET_MAX_ANCHOR_OFFSET
#define TARGET_MAX_ANCHOR_OFFSET 0xfff
+#undef TARGET_MD_ASM_ADJUST
+#define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-s390.h"
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 2da768d..991af96 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -41,7 +41,9 @@ enum processor_flags
PF_Z14 = 2048,
PF_VXE = 4096,
PF_VXE2 = 8192,
- PF_Z15 = 16384
+ PF_Z15 = 16384,
+ PF_NNPA = 32768,
+ PF_ARCH14 = 65536
};
/* This is necessary to avoid a warning about comparing different enum
@@ -108,6 +110,14 @@ enum processor_flags
(s390_arch_flags & PF_VXE2)
#define TARGET_CPU_VXE2_P(opts) \
(opts->x_s390_arch_flags & PF_VXE2)
+#define TARGET_CPU_ARCH14 \
+ (s390_arch_flags & PF_ARCH14)
+#define TARGET_CPU_ARCH14_P(opts) \
+ (opts->x_s390_arch_flags & PF_ARCH14)
+#define TARGET_CPU_NNPA \
+ (s390_arch_flags & PF_NNPA)
+#define TARGET_CPU_NNPA_P(opts) \
+ (opts->x_s390_arch_flags & PF_NNPA)
#define TARGET_HARD_FLOAT_P(opts) (!TARGET_SOFT_FLOAT_P(opts))
@@ -167,6 +177,14 @@ enum processor_flags
(TARGET_VX && TARGET_CPU_VXE2)
#define TARGET_VXE2_P(opts) \
(TARGET_VX_P (opts) && TARGET_CPU_VXE2_P (opts))
+#define TARGET_ARCH14 (TARGET_ZARCH && TARGET_CPU_ARCH14)
+#define TARGET_ARCH14_P(opts) \
+ (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_ARCH14_P (opts))
+#define TARGET_NNPA \
+ (TARGET_ZARCH && TARGET_CPU_NNPA)
+#define TARGET_NNPA_P(opts) \
+ (TARGET_ZARCH_P (opts) && TARGET_CPU_NNPA_P (opts))
+
#if defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13)
#define TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS TARGET_Z13
#elif defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS)
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 3f96f5f..c10f25b 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -246,6 +246,13 @@
UNSPEC_VEC_VFMAX
UNSPEC_VEC_ELTSWAP
+
+ UNSPEC_NNPA_VCLFNHS_V8HI
+ UNSPEC_NNPA_VCLFNLS_V8HI
+ UNSPEC_NNPA_VCRNFS_V8HI
+
+ UNSPEC_NNPA_VCFN_V8HI
+ UNSPEC_NNPA_VCNF_V8HI
])
;;
@@ -518,7 +525,7 @@
(const (symbol_ref "s390_tune_attr")))
(define_attr "cpu_facility"
- "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2"
+ "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,arch14,nnpa"
(const_string "standard"))
(define_attr "enabled" ""
@@ -583,7 +590,15 @@
(and (eq_attr "cpu_facility" "vxe2")
(match_test "TARGET_VXE2"))
(const_int 1)
- ]
+
+ (and (eq_attr "cpu_facility" "arch14")
+ (match_test "TARGET_ARCH14"))
+ (const_int 1)
+
+ (and (eq_attr "cpu_facility" "nnpa")
+ (match_test "TARGET_NNPA"))
+ (const_int 1)
+]
(const_int 0)))
;; Whether an instruction supports relative long addressing.
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index de7207e..1027f6a 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -116,6 +116,9 @@ EnumValue
Enum(processor_type) String(arch13) Value(PROCESSOR_8561_Z15)
EnumValue
+Enum(processor_type) String(arch14) Value(PROCESSOR_ARCH14)
+
+EnumValue
Enum(processor_type) String(native) Value(PROCESSOR_NATIVE) DriverOnly
mbackchain
diff --git a/gcc/config/s390/vecintrin.h b/gcc/config/s390/vecintrin.h
index cbc8f4d..6bd26f8 100644
--- a/gcc/config/s390/vecintrin.h
+++ b/gcc/config/s390/vecintrin.h
@@ -173,6 +173,12 @@ __lcbb(const void *ptr, int bndry)
#define vec_vsterg vec_vlerh
#define vec_vsterf_flt vec_vlerf_flt
#define vec_vsterg_dbl vec_vlerg_dbl
+
+#define vec_extend_to_fp32_hi __builtin_s390_vclfnhs
+#define vec_extend_to_fp32_lo __builtin_s390_vclfnls
+#define vec_round_from_fp32 __builtin_s390_vcrnfs
+#define vec_convert_to_fp16 __builtin_s390_vcfn
+#define vec_convert_from_fp16 __builtin_s390_vcnf
#define vec_gather_element __builtin_s390_vec_gather_element
#define vec_xl __builtin_s390_vec_xl
#define vec_xld2 __builtin_s390_vec_xld2
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 0e3c31f..c80d582 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -616,12 +616,23 @@
vlvgp\t%v0,%1,%N1"
[(set_attr "op_type" "VRR,VRX,VRX,VRI,VRR")])
-(define_insn "*fprx2_to_tf"
- [(set (match_operand:TF 0 "nonimmediate_operand" "=v")
- (subreg:TF (match_operand:FPRX2 1 "general_operand" "f") 0))]
+(define_insn_and_split "fprx2_to_tf"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "=v,AR")
+ (subreg:TF (match_operand:FPRX2 1 "general_operand" "f,f") 0))]
"TARGET_VXE"
- "vmrhg\t%v0,%1,%N1"
- [(set_attr "op_type" "VRR")])
+ "@
+ vmrhg\t%v0,%1,%N1
+ #"
+ "!(MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0]))"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+{
+ operands[2] = simplify_gen_subreg (DFmode, operands[0], TFmode, 0);
+ operands[3] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 0);
+ operands[4] = simplify_gen_subreg (DFmode, operands[0], TFmode, 8);
+ operands[5] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 8);
+}
+ [(set_attr "op_type" "VRR,*")])
(define_insn "*vec_ti_to_v1ti"
[(set (match_operand:V1TI 0 "nonimmediate_operand" "=v,v,R, v, v,v")
@@ -753,6 +764,21 @@
"vpdi\t%V0,%v1,%V0,5"
[(set_attr "op_type" "VRR")])
+(define_insn_and_split "tf_to_fprx2"
+ [(set (match_operand:FPRX2 0 "nonimmediate_operand" "=f,f")
+ (subreg:FPRX2 (match_operand:TF 1 "general_operand" "v,AR") 0))]
+ "TARGET_VXE"
+ "#"
+ "!(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+{
+ operands[2] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 0);
+ operands[3] = simplify_gen_subreg (DFmode, operands[1], TFmode, 0);
+ operands[4] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 8);
+ operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8);
+})
+
; vec_perm_const for V2DI using vpdi?
;;
@@ -1563,7 +1589,7 @@
[(set (match_operand:<TOINTVEC> 0 "register_operand" "")
(match_operator:<TOINTVEC> 1 "vcond_comparison_operator"
[(match_operand:V_HW 2 "register_operand" "")
- (match_operand:V_HW 3 "register_operand" "")]))]
+ (match_operand:V_HW 3 "nonmemory_operand" "")]))]
"TARGET_VX"
{
s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]);
@@ -2454,6 +2480,42 @@
"HAVE_TF (trunctfsf2)"
{ EXPAND_TF (trunctfsf2, 2); })
+(define_expand "trunctf<DFP_ALL:mode>2_vr"
+ [(match_operand:DFP_ALL 0 "nonimmediate_operand" "")
+ (match_operand:TF 1 "nonimmediate_operand" "")]
+ "TARGET_HARD_DFP
+ && GET_MODE_SIZE (TFmode) > GET_MODE_SIZE (<DFP_ALL:MODE>mode)
+ && TARGET_VXE"
+{
+ rtx fprx2 = gen_reg_rtx (FPRX2mode);
+ emit_insn (gen_tf_to_fprx2 (fprx2, operands[1]));
+ emit_insn (gen_truncfprx2<DFP_ALL:mode>2 (operands[0], fprx2));
+ DONE;
+})
+
+(define_expand "trunctf<DFP_ALL:mode>2"
+ [(match_operand:DFP_ALL 0 "nonimmediate_operand" "")
+ (match_operand:TF 1 "nonimmediate_operand" "")]
+ "HAVE_TF (trunctf<DFP_ALL:mode>2)"
+ { EXPAND_TF (trunctf<DFP_ALL:mode>2, 2); })
+
+(define_expand "trunctdtf2_vr"
+ [(match_operand:TF 0 "nonimmediate_operand" "")
+ (match_operand:TD 1 "nonimmediate_operand" "")]
+ "TARGET_HARD_DFP && TARGET_VXE"
+{
+ rtx fprx2 = gen_reg_rtx (FPRX2mode);
+ emit_insn (gen_trunctdfprx22 (fprx2, operands[1]));
+ emit_insn (gen_fprx2_to_tf (operands[0], fprx2));
+ DONE;
+})
+
+(define_expand "trunctdtf2"
+ [(match_operand:TF 0 "nonimmediate_operand" "")
+ (match_operand:TD 1 "nonimmediate_operand" "")]
+ "HAVE_TF (trunctdtf2)"
+ { EXPAND_TF (trunctdtf2, 2); })
+
; load lengthened
(define_insn "extenddftf2_vr"
@@ -2485,6 +2547,42 @@
"HAVE_TF (extendsftf2)"
{ EXPAND_TF (extendsftf2, 2); })
+(define_expand "extend<DFP_ALL:mode>tf2_vr"
+ [(match_operand:TF 0 "nonimmediate_operand" "")
+ (match_operand:DFP_ALL 1 "nonimmediate_operand" "")]
+ "TARGET_HARD_DFP
+ && GET_MODE_SIZE (<DFP_ALL:MODE>mode) < GET_MODE_SIZE (TFmode)
+ && TARGET_VXE"
+{
+ rtx fprx2 = gen_reg_rtx (FPRX2mode);
+ emit_insn (gen_extend<DFP_ALL:mode>fprx22 (fprx2, operands[1]));
+ emit_insn (gen_fprx2_to_tf (operands[0], fprx2));
+ DONE;
+})
+
+(define_expand "extend<DFP_ALL:mode>tf2"
+ [(match_operand:TF 0 "nonimmediate_operand" "")
+ (match_operand:DFP_ALL 1 "nonimmediate_operand" "")]
+ "HAVE_TF (extend<DFP_ALL:mode>tf2)"
+ { EXPAND_TF (extend<DFP_ALL:mode>tf2, 2); })
+
+(define_expand "extendtftd2_vr"
+ [(match_operand:TD 0 "nonimmediate_operand" "")
+ (match_operand:TF 1 "nonimmediate_operand" "")]
+ "TARGET_HARD_DFP && TARGET_VXE"
+{
+ rtx fprx2 = gen_reg_rtx (FPRX2mode);
+ emit_insn (gen_tf_to_fprx2 (fprx2, operands[1]));
+ emit_insn (gen_extendfprx2td2 (operands[0], fprx2));
+ DONE;
+})
+
+(define_expand "extendtftd2"
+ [(match_operand:TD 0 "nonimmediate_operand" "")
+ (match_operand:TF 1 "nonimmediate_operand" "")]
+ "HAVE_TF (extendtftd2)"
+ { EXPAND_TF (extendtftd2, 2); })
+
; test data class
(define_expand "signbittf2_vr"
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 816786f..3df501b 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -2312,3 +2312,58 @@
"TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS (<V_HW_HSD:MODE>mode)"
"vstebr<bhfgq>\t%v1,%0,%2"
[(set_attr "op_type" "VRX")])
+
+
+;;
+;; NNPA Facility
+;;
+
+(define_insn "vclfnhs_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (unspec:V4SF [(vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand" "v")
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))
+ (match_operand:QI 2 "const_mask_operand" "C")]
+ UNSPEC_NNPA_VCLFNHS_V8HI))]
+ "TARGET_NNPA"
+ "vclfnh\t%v0,%v1,2,%2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "vclfnls_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (unspec:V4SF [(vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand" "v")
+ (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)]))
+ (match_operand:QI 2 "const_mask_operand" "C")]
+ UNSPEC_NNPA_VCLFNLS_V8HI))]
+ "TARGET_NNPA"
+ "vclfnl\t%v0,%v1,2,%2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "vcrnfs_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "v")
+ (match_operand:V4SF 2 "register_operand" "v")
+ (match_operand:QI 3 "const_mask_operand" "C")]
+ UNSPEC_NNPA_VCRNFS_V8HI))]
+ "TARGET_NNPA"
+ "vcrnf\t%v0,%v1,%v2,%3,2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "vcfn_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+ (match_operand:QI 2 "const_mask_operand" "C")]
+ UNSPEC_NNPA_VCFN_V8HI))]
+ "TARGET_NNPA"
+ "vcfn\t%v0,%v1,1,%2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "vcnf_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+ (match_operand:QI 2 "const_mask_operand" "C")]
+ UNSPEC_NNPA_VCNF_V8HI))]
+ "TARGET_NNPA"
+ "vcnf\t%v0,%v1,%2,1"
+ [(set_attr "op_type" "VRR")])
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index f355793..f150417 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -13585,23 +13585,18 @@ sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
emit_insn (gen_rtx_SET (operands[0], bshuf));
}
-/* On sparc, any mode which naturally allocates into the float
+/* On the SPARC, any mode which naturally allocates into the single float
registers should return 4 here. */
unsigned int
sparc_regmode_natural_size (machine_mode mode)
{
- int size = UNITS_PER_WORD;
+ const enum mode_class cl = GET_MODE_CLASS (mode);
- if (TARGET_ARCH64)
- {
- enum mode_class mclass = GET_MODE_CLASS (mode);
-
- if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
- size = 4;
- }
+ if ((cl == MODE_FLOAT || cl == MODE_VECTOR_INT) && GET_MODE_SIZE (mode) <= 4)
+ return 4;
- return size;
+ return UNITS_PER_WORD;
}
/* Implement TARGET_HARD_REGNO_NREGS.
diff --git a/gcc/config/sparc/t-sparc b/gcc/config/sparc/t-sparc
index de99ce7..64906e9 100644
--- a/gcc/config/sparc/t-sparc
+++ b/gcc/config/sparc/t-sparc
@@ -27,3 +27,7 @@ sparc-c.o: $(srcdir)/config/sparc/sparc-c.c
sparc-d.o: $(srcdir)/config/sparc/sparc-d.c
$(COMPILE) $<
$(POSTCOMPILE)
+
+# Hack around PR bootstrap/92002.
+tree-ssanames.o-warn += -Wno-error=uninitialized -Wno-error=maybe-uninitialized
+wide-int.o-warn += -Wno-error=uninitialized -Wno-error=maybe-uninitialized
diff --git a/gcc/config/vax/vax.c b/gcc/config/vax/vax.c
index fe4c14e..726c371 100644
--- a/gcc/config/vax/vax.c
+++ b/gcc/config/vax/vax.c
@@ -56,7 +56,7 @@ static int vax_address_cost (rtx, machine_mode, addr_space_t, bool);
static bool vax_rtx_costs (rtx, machine_mode, int, int, int *, bool);
static machine_mode vax_cc_modes_compatible (machine_mode, machine_mode);
static rtx_insn *vax_md_asm_adjust (vec<rtx> &, vec<rtx> &,
- vec<const char *> &,
+ vec<machine_mode> &, vec<const char *> &,
vec<rtx> &, HARD_REG_SET &);
static rtx vax_function_arg (cumulative_args_t, const function_arg_info &);
static void vax_function_arg_advance (cumulative_args_t,
@@ -1174,6 +1174,7 @@ vax_cc_modes_compatible (machine_mode m1, machine_mode m2)
static rtx_insn *
vax_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED,
vec<rtx> &inputs ATTRIBUTE_UNUSED,
+ vec<machine_mode> &input_modes ATTRIBUTE_UNUSED,
vec<const char *> &constraints ATTRIBUTE_UNUSED,
vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
{
diff --git a/gcc/config/visium/visium.c b/gcc/config/visium/visium.c
index e0b88be..7eb2248 100644
--- a/gcc/config/visium/visium.c
+++ b/gcc/config/visium/visium.c
@@ -188,8 +188,9 @@ static bool visium_frame_pointer_required (void);
static tree visium_build_builtin_va_list (void);
static rtx_insn *visium_md_asm_adjust (vec<rtx> &, vec<rtx> &,
- vec<const char *> &,
- vec<rtx> &, HARD_REG_SET &);
+ vec<machine_mode> &,
+ vec<const char *> &, vec<rtx> &,
+ HARD_REG_SET &);
static bool visium_legitimate_constant_p (machine_mode, rtx);
@@ -791,9 +792,10 @@ visium_conditional_register_usage (void)
the original cc0-based compiler. */
static rtx_insn *
-visium_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
- vec<const char *> &/*constraints*/,
- vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
+visium_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
+ vec<machine_mode> & /*input_modes*/,
+ vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
+ HARD_REG_SET &clobbered_regs)
{
clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REGNUM));
SET_HARD_REG_BIT (clobbered_regs, FLAGS_REGNUM);