aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Meissner <meissner@linux.vnet.ibm.com>2009-06-25 23:07:07 +0000
committerMichael Meissner <meissner@gcc.gnu.org>2009-06-25 23:07:07 +0000
commit8beb65e326baa7a434227ab21c1062b09ee34bdb (patch)
treee9b80a98369f5a1302118a83e8fd20dce190e56d
parentc8aca64f5b1cc43734c6c31945638a65030a3801 (diff)
downloadgcc-8beb65e326baa7a434227ab21c1062b09ee34bdb.zip
gcc-8beb65e326baa7a434227ab21c1062b09ee34bdb.tar.gz
gcc-8beb65e326baa7a434227ab21c1062b09ee34bdb.tar.bz2
power7 patch #2: add bswap64; add preliminary VSX register support; add more -mdebug=* support
Co-Authored-By: Pat Haugen <pthaugen@us.ibm.com> Co-Authored-By: Revital Eres <eres@il.ibm.com> From-SVN: r148955
-rw-r--r--gcc/ChangeLog66
-rw-r--r--gcc/config/rs6000/rs6000-protos.h21
-rw-r--r--gcc/config/rs6000/rs6000.c881
-rw-r--r--gcc/config/rs6000/rs6000.h45
-rw-r--r--gcc/config/rs6000/rs6000.md381
5 files changed, 1264 insertions, 130 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index eef7507..0823094 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,69 @@
+2009-06-25 Michael Meissner <meissner@linux.vnet.ibm.com>
+ Pat Haugen <pthaugen@us.ibm.com>
+ Revital Eres <ERES@il.ibm.com>
+
+ * config/rs6000/rs6000-protos.h (rs6000_secondary_reload_class):
+ Change some of the functions called by macros to being called
+ through a pointer, so debug functions can be inserted if
+ -mdebug=addr or -mdebug=cost.
+ (rs6000_preferred_reload_class_ptr): Ditto.
+ (rs6000_secondary_reload_class_ptr): Ditto.
+ (rs6000_secondary_memory_needed_ptr): Ditto.
+ (rs6000_cannot_change_mode_class_ptr): Ditto.
+ (rs6000_secondary_reload_inner): Ditto.
+ (rs6000_legitimize_reload_address): Ditto.
+ (rs6000_legitimize_reload_address_ptr): Ditto.
+ (rs6000_mode_dependent_address): Ditto.
+ (rs6000_mode_dependent_address_ptr): Ditto.
+
+ * config/rs6000/rs6000.c (reg_offset_addressing_ok_p): New
+ function to return true if the mode allows reg + integer
+ addresses.
+ (virtual_stack_registers_memory_p): New function to return true if
+ the address refers to a virtual stack register.
+ (rs6000_legitimate_offset_address_p): Move code to say whether a
+ mode supports reg+int addressing to reg_offset_addressing_ok_p and
+ call it.
+ (rs6000_legitimate_address_p): Add checks for modes that only can
+ do reg+reg addressing. Start adding VSX support.
+ (rs6000_legitimize_reload_address): Ditto.
+ (rs6000_legitimize_address): Ditto.
+ (rs6000_debug_legitimate_address_p): New debug functions for
+ -mdebug=addr and -mdebug=cost.
+ (rs6000_debug_rtx_costs): Ditto.
+ (rs6000_debug_address_costs): Ditto.
+ (rs6000_debug_adjust_cost): Ditto.
+ (rs6000_debug_legitimize_address): Ditto.
+ (rs6000_legitimize_reload_address_ptr): Point to call normal
+ function or debug function. Make functions called via pointer
+ static.
+ (rs6000_mode_dependent_address_ptr): Ditto.
+ (rs6000_secondary_reload_class_ptr): Ditto.
+ (rs6000_hard_regno_mode_ok): Add preliminary VSX support.
+ (rs6000_emit_move): Add -mdebug=addr support. Change an abort
+ into a friendlier error.
+ (rs6000_init_builtins): Add initial VSX support.
+ (rs6000_adjust_cost): Fix some spacing issues.
+
+ * config/rs6000/rs6000.h (enum reg_class): Add VSX_REGS.
+ (REG_CLASS_NAMES): Ditto.
+ (REG_CLASS_CONTENTS): Ditto.
+ (PREFERRED_RELOAD_CLASS): Move from a macro to calling through a
+ pointer, to add -mdebug=addr support.
+ (CANNOT_CHANGE_MODE_CLASS): Ditto.
+ (SECONDARY_RELOAD_CLASS): Call through a pointer to add
+ -mdebug=addr support.
+ (LEGITIMIZE_RELOAD_ADDRESS): Ditto.
+ (GO_IF_MODE_DEPENDENT_ADDRESS): Ditto.
+ (enum rs6000_builtins): Add RS6000_BUILTIN_BSWAP_HI.
+
+ * config/rs6000/rs6000.md (bswaphi*): Add support for swapping
+ 16-bit values.
+ (bswapsi*): Set attribute types for load/store. Add combiner
+ patterns to eliminate zero extend on 64-bit.
+ (bswapdi*): Add support for swapping 64-bit values. Use ldbrx and
+ stdbrx if the hardware supports those instructions.
+
2009-06-25 Ian Lance Taylor <iant@google.com>
* doc/invoke.texi (Option Summary): Mention -static-libstdc++.
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 731349e..96f215b 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -64,9 +64,18 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx);
extern int registers_ok_for_quad_peep (rtx, rtx);
extern int mems_ok_for_quad_peep (rtx, rtx);
extern bool gpr_or_gpr_p (rtx, rtx);
-extern enum reg_class rs6000_secondary_reload_class (enum reg_class,
- enum machine_mode, rtx);
-
+extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
+ enum reg_class);
+extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
+ enum machine_mode,
+ rtx);
+extern bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class,
+ enum reg_class,
+ enum machine_mode);
+extern bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode,
+ enum machine_mode,
+ enum reg_class);
+extern void rs6000_secondary_reload_inner (rtx, rtx, rtx, bool);
extern int paired_emit_vector_cond_expr (rtx, rtx, rtx,
rtx, rtx, rtx);
extern void paired_expand_vector_move (rtx operands[]);
@@ -107,10 +116,10 @@ extern rtx create_TOC_reference (rtx);
extern void rs6000_split_multireg_move (rtx, rtx);
extern void rs6000_emit_move (rtx, rtx, enum machine_mode);
extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
-extern rtx rs6000_legitimize_reload_address (rtx, enum machine_mode,
- int, int, int, int *);
+extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
+ int, int, int, int *);
extern bool rs6000_legitimate_offset_address_p (enum machine_mode, rtx, int);
-extern bool rs6000_mode_dependent_address (rtx);
+extern bool (*rs6000_mode_dependent_address_ptr) (rtx);
extern rtx rs6000_find_base_term (rtx);
extern bool rs6000_offsettable_memref_p (rtx);
extern rtx rs6000_return_addr (int, rtx);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0263f91..cf8f53f 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -792,6 +792,7 @@ struct processor_costs power7_cost = {
static bool rs6000_function_ok_for_sibcall (tree, tree);
static const char *rs6000_invalid_within_doloop (const_rtx);
static bool rs6000_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool rs6000_debug_legitimate_address_p (enum machine_mode, rtx, bool);
static rtx rs6000_generate_compare (rtx, enum machine_mode);
static void rs6000_emit_stack_tie (void);
static void rs6000_frame_related (rtx, rtx, HOST_WIDE_INT, rtx, rtx);
@@ -803,6 +804,8 @@ static void rs6000_emit_allocate_stack (HOST_WIDE_INT, int, int);
static unsigned rs6000_hash_constant (rtx);
static unsigned toc_hash_function (const void *);
static int toc_hash_eq (const void *, const void *);
+static bool reg_offset_addressing_ok_p (enum machine_mode);
+static bool virtual_stack_registers_memory_p (rtx);
static bool constant_pool_expr_p (rtx);
static bool legitimate_small_data_p (enum machine_mode, rtx);
static bool legitimate_lo_sum_address_p (enum machine_mode, rtx, int);
@@ -866,7 +869,10 @@ static void rs6000_xcoff_file_end (void);
#endif
static int rs6000_variable_issue (FILE *, int, rtx, int);
static bool rs6000_rtx_costs (rtx, int, int, int *, bool);
+static bool rs6000_debug_rtx_costs (rtx, int, int, int *, bool);
+static int rs6000_debug_address_cost (rtx, bool);
static int rs6000_adjust_cost (rtx, rtx, rtx, int);
+static int rs6000_debug_adjust_cost (rtx, rtx, rtx, int);
static void rs6000_sched_init (FILE *, int, int);
static bool is_microcoded_insn (rtx);
static bool is_nonpipeline_insn (rtx);
@@ -957,6 +963,7 @@ int easy_vector_constant (rtx, enum machine_mode);
static rtx rs6000_dwarf_register_span (rtx);
static void rs6000_init_dwarf_reg_sizes_extra (tree);
static rtx rs6000_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx rs6000_debug_legitimize_address (rtx, rtx, enum machine_mode);
static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
static rtx rs6000_tls_get_addr (void);
@@ -1009,6 +1016,59 @@ static int get_vsel_insn (enum machine_mode);
static void rs6000_emit_vector_select (rtx, rtx, rtx, rtx);
static tree rs6000_stack_protect_fail (void);
+static rtx rs6000_legitimize_reload_address (rtx, enum machine_mode, int, int,
+ int, int *);
+
+static rtx rs6000_debug_legitimize_reload_address (rtx, enum machine_mode, int,
+ int, int, int *);
+
+rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode, int, int,
+ int, int *)
+ = rs6000_legitimize_reload_address;
+
+static bool rs6000_mode_dependent_address (rtx);
+static bool rs6000_debug_mode_dependent_address (rtx);
+bool (*rs6000_mode_dependent_address_ptr) (rtx)
+ = rs6000_mode_dependent_address;
+
+static enum reg_class rs6000_secondary_reload_class (enum reg_class,
+ enum machine_mode, rtx);
+static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
+ enum machine_mode,
+ rtx);
+enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
+ enum machine_mode, rtx)
+ = rs6000_secondary_reload_class;
+
+static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
+static enum reg_class rs6000_debug_preferred_reload_class (rtx,
+ enum reg_class);
+enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
+ = rs6000_preferred_reload_class;
+
+static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
+ enum machine_mode);
+
+static bool rs6000_debug_secondary_memory_needed (enum reg_class,
+ enum reg_class,
+ enum machine_mode);
+
+bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
+ enum machine_mode)
+ = rs6000_secondary_memory_needed;
+
+static bool rs6000_cannot_change_mode_class (enum machine_mode,
+ enum machine_mode,
+ enum reg_class);
+static bool rs6000_debug_cannot_change_mode_class (enum machine_mode,
+ enum machine_mode,
+ enum reg_class);
+
+bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode,
+ enum machine_mode,
+ enum reg_class)
+ = rs6000_cannot_change_mode_class;
+
const int INSN_NOT_AVAILABLE = -1;
static enum machine_mode rs6000_eh_return_filter_mode (void);
@@ -1431,7 +1491,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
/* AltiVec only in AldyVec registers. */
if (ALTIVEC_REGNO_P (regno))
- return ALTIVEC_VECTOR_MODE (mode);
+ return VECTOR_MEM_ALTIVEC_OR_VSX_P (mode);
/* ...but GPRs can hold SIMD data on the SPE in one register. */
if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
@@ -2057,6 +2117,34 @@ rs6000_override_options (const char *default_cpu)
rs6000_debug_cost = 1;
else
error ("unknown -mdebug-%s switch", rs6000_debug_name);
+
+ /* If the appropriate debug option is enabled, replace the target hooks
+ with debug versions that call the real version and then prints
+ debugging information. */
+ if (TARGET_DEBUG_COST)
+ {
+ targetm.rtx_costs = rs6000_debug_rtx_costs;
+ targetm.address_cost = rs6000_debug_address_cost;
+ targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
+ }
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
+ targetm.legitimize_address = rs6000_debug_legitimize_address;
+ rs6000_secondary_reload_class_ptr
+ = rs6000_debug_secondary_reload_class;
+ rs6000_secondary_memory_needed_ptr
+ = rs6000_debug_secondary_memory_needed;
+ rs6000_cannot_change_mode_class_ptr
+ = rs6000_debug_cannot_change_mode_class;
+ rs6000_preferred_reload_class_ptr
+ = rs6000_debug_preferred_reload_class;
+ rs6000_legitimize_reload_address_ptr
+ = rs6000_debug_legitimize_reload_address;
+ rs6000_mode_dependent_address_ptr
+ = rs6000_debug_mode_dependent_address;
+ }
}
if (rs6000_traceback_name)
@@ -3978,6 +4066,58 @@ gpr_or_gpr_p (rtx op0, rtx op1)
/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
static bool
+reg_offset_addressing_ok_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case V16QImode:
+ case V8HImode:
+ case V4SFmode:
+ case V4SImode:
+ case V2DFmode:
+ case V2DImode:
+ /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. */
+ if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
+ return false;
+ break;
+
+ case V4HImode:
+ case V2SImode:
+ case V1DImode:
+ case V2SFmode:
+ /* Paired vector modes. Only reg+reg addressing is valid. */
+ if (TARGET_PAIRED_FLOAT)
+ return false;
+ break;
+
+ default:
+ break;
+ }
+
+ return true;
+}
+
+static bool
+virtual_stack_registers_memory_p (rtx op)
+{
+ int regnum;
+
+ if (GET_CODE (op) == REG)
+ regnum = REGNO (op);
+
+ else if (GET_CODE (op) == PLUS
+ && GET_CODE (XEXP (op, 0)) == REG
+ && GET_CODE (XEXP (op, 1)) == CONST_INT)
+ regnum = REGNO (XEXP (op, 0));
+
+ else
+ return false;
+
+ return (regnum >= FIRST_VIRTUAL_REGISTER
+ && regnum <= LAST_VIRTUAL_REGISTER);
+}
+
+static bool
constant_pool_expr_p (rtx op)
{
rtx base, offset;
@@ -4034,6 +4174,8 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x, int strict)
return false;
if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
return false;
+ if (!reg_offset_addressing_ok_p (mode))
+ return virtual_stack_registers_memory_p (x);
if (legitimate_constant_pool_address_p (x))
return true;
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
@@ -4043,22 +4185,10 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x, int strict)
extra = 0;
switch (mode)
{
- case V16QImode:
- case V8HImode:
- case V4SFmode:
- case V4SImode:
- /* AltiVec vector modes. Only reg+reg addressing is valid and
- constant offset zero should not occur due to canonicalization. */
- return false;
-
case V4HImode:
case V2SImode:
case V1DImode:
case V2SFmode:
- /* Paired vector modes. Only reg+reg addressing is valid and
- constant offset zero should not occur due to canonicalization. */
- if (TARGET_PAIRED_FLOAT)
- return false;
/* SPE vector modes. */
return SPE_CONST_OFFSET_OK (offset);
@@ -4066,6 +4196,11 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x, int strict)
if (TARGET_E500_DOUBLE)
return SPE_CONST_OFFSET_OK (offset);
+ /* If we are using VSX scalar loads, restrict ourselves to reg+reg
+ addressing. */
+ if (VECTOR_MEM_VSX_P (DFmode))
+ return false;
+
case DDmode:
case DImode:
/* On e500v2, we may have:
@@ -4136,7 +4271,7 @@ avoiding_indexed_address_p (enum machine_mode mode)
{
/* Avoid indexed addressing for modes that have non-indexed
load/store instruction forms. */
- return TARGET_AVOID_XFORM && !ALTIVEC_VECTOR_MODE (mode);
+ return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
}
inline bool
@@ -4222,17 +4357,34 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
Then check for the sum of a register and something not constant, try to
load the other things into a register and return the sum. */
-rtx
+static rtx
rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
enum machine_mode mode)
{
+ if (!reg_offset_addressing_ok_p (mode))
+ {
+ if (virtual_stack_registers_memory_p (x))
+ return x;
+
+ /* In theory we should not be seeing addresses of the form reg+0,
+ but just in case it is generated, optimize it away. */
+ if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
+ return force_reg (Pmode, XEXP (x, 0));
+
+ /* Make sure both operands are registers. */
+ else if (GET_CODE (x) == PLUS)
+ return gen_rtx_PLUS (Pmode,
+ force_reg (Pmode, XEXP (x, 0)),
+ force_reg (Pmode, XEXP (x, 1)));
+ else
+ return force_reg (Pmode, x);
+ }
if (GET_CODE (x) == SYMBOL_REF)
{
enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
if (model != 0)
return rs6000_legitimize_tls_address (x, model);
}
-
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 0)) == REG
&& GET_CODE (XEXP (x, 1)) == CONST_INT
@@ -4241,7 +4393,6 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
&& (mode == DImode || mode == TImode)
&& (INTVAL (XEXP (x, 1)) & 3) != 0)
|| SPE_VECTOR_MODE (mode)
- || ALTIVEC_VECTOR_MODE (mode)
|| (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
|| mode == DImode || mode == DDmode
|| mode == TDmode))))
@@ -4271,18 +4422,6 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
return gen_rtx_PLUS (Pmode, XEXP (x, 0),
force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
}
- else if (ALTIVEC_VECTOR_MODE (mode))
- {
- rtx reg;
-
- /* Make sure both operands are registers. */
- if (GET_CODE (x) == PLUS)
- return gen_rtx_PLUS (Pmode, force_reg (Pmode, XEXP (x, 0)),
- force_reg (Pmode, XEXP (x, 1)));
-
- reg = force_reg (Pmode, x);
- return reg;
- }
else if (SPE_VECTOR_MODE (mode)
|| (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
|| mode == DDmode || mode == TDmode
@@ -4364,6 +4503,56 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
return x;
}
+/* Debug version of rs6000_legitimize_address. */
+static rtx
+rs6000_debug_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+ rtx ret;
+ rtx insns;
+
+ start_sequence ();
+ ret = rs6000_legitimize_address (x, oldx, mode);
+ insns = get_insns ();
+ end_sequence ();
+
+ if (ret != x)
+ {
+ fprintf (stderr,
+ "\nrs6000_legitimize_address: mode %s, old code %s, "
+ "new code %s, modified\n",
+ GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
+ GET_RTX_NAME (GET_CODE (ret)));
+
+ fprintf (stderr, "Original address:\n");
+ debug_rtx (x);
+
+ fprintf (stderr, "oldx:\n");
+ debug_rtx (oldx);
+
+ fprintf (stderr, "New address:\n");
+ debug_rtx (ret);
+
+ if (insns)
+ {
+ fprintf (stderr, "Insns added:\n");
+ debug_rtx_list (insns, 20);
+ }
+ }
+ else
+ {
+ fprintf (stderr,
+ "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
+ GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
+
+ debug_rtx (x);
+ }
+
+ if (insns)
+ emit_insn (insns);
+
+ return ret;
+}
+
/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
We need to emit DTP-relative relocations. */
@@ -4638,11 +4827,13 @@ rs6000_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
A movsf_low is generated so we wind up with 2 instructions rather than 3.
The Darwin code is inside #if TARGET_MACHO because only then are the
machopic_* functions defined. */
-rtx
+static rtx
rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
int opnum, int type,
int ind_levels ATTRIBUTE_UNUSED, int *win)
{
+ bool reg_offset_p = reg_offset_addressing_ok_p (mode);
+
/* We must recognize output that we have already generated ourselves. */
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 0)) == PLUS
@@ -4683,8 +4874,9 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
&& REGNO (XEXP (x, 0)) < 32
&& INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
&& GET_CODE (XEXP (x, 1)) == CONST_INT
+ && reg_offset_p
&& (INTVAL (XEXP (x, 1)) & 3) != 0
- && !ALTIVEC_VECTOR_MODE (mode)
+ && VECTOR_MEM_NONE_P (mode)
&& GET_MODE_SIZE (mode) >= UNITS_PER_WORD
&& TARGET_POWERPC64)
{
@@ -4701,11 +4893,12 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
&& REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
&& INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
&& GET_CODE (XEXP (x, 1)) == CONST_INT
+ && reg_offset_p
&& !SPE_VECTOR_MODE (mode)
&& !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
|| mode == DDmode || mode == TDmode
|| mode == DImode))
- && !ALTIVEC_VECTOR_MODE (mode))
+ && VECTOR_MEM_NONE_P (mode))
{
HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
@@ -4735,7 +4928,8 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
}
if (GET_CODE (x) == SYMBOL_REF
- && !ALTIVEC_VECTOR_MODE (mode)
+ && reg_offset_p
+ && VECTOR_MEM_NONE_P (mode)
&& !SPE_VECTOR_MODE (mode)
#if TARGET_MACHO
&& DEFAULT_ABI == ABI_DARWIN
@@ -4775,9 +4969,11 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
/* Reload an offset address wrapped by an AND that represents the
masking of the lower bits. Strip the outer AND and let reload
- convert the offset address into an indirect address. */
- if (TARGET_ALTIVEC
- && ALTIVEC_VECTOR_MODE (mode)
+ convert the offset address into an indirect address. For VSX,
+ force reload to create the address with an AND in a separate
+ register, because we can't guarantee an altivec register will
+ be used. */
+ if (VECTOR_MEM_ALTIVEC_P (mode)
&& GET_CODE (x) == AND
&& GET_CODE (XEXP (x, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
@@ -4791,6 +4987,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
}
if (TARGET_TOC
+ && reg_offset_p
&& GET_CODE (x) == SYMBOL_REF
&& constant_pool_expr_p (x)
&& ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), mode))
@@ -4803,6 +5000,33 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
return x;
}
+/* Debug version of rs6000_legitimize_reload_address. */
+static rtx
+rs6000_debug_legitimize_reload_address (rtx x, enum machine_mode mode,
+ int opnum, int type,
+ int ind_levels, int *win)
+{
+ rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
+ ind_levels, win);
+ fprintf (stderr,
+ "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
+ "type = %d, ind_levels = %d, win = %d, original addr:\n",
+ GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
+ debug_rtx (x);
+
+ if (x == ret)
+ fprintf (stderr, "Same address returned\n");
+ else if (!ret)
+ fprintf (stderr, "NULL returned\n");
+ else
+ {
+ fprintf (stderr, "New address:\n");
+ debug_rtx (ret);
+ }
+
+ return ret;
+}
+
/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
that is a valid memory address for an instruction.
The MODE argument is the machine mode for the MEM expression
@@ -4823,9 +5047,10 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
bool
rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
{
+ bool reg_offset_p = reg_offset_addressing_ok_p (mode);
+
/* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
- if (TARGET_ALTIVEC
- && ALTIVEC_VECTOR_MODE (mode)
+ if (VECTOR_MEM_ALTIVEC_P (mode)
&& GET_CODE (x) == AND
&& GET_CODE (XEXP (x, 1)) == CONST_INT
&& INTVAL (XEXP (x, 1)) == -16)
@@ -4836,7 +5061,7 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
if (legitimate_indirect_address_p (x, reg_ok_strict))
return 1;
if ((GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
- && !ALTIVEC_VECTOR_MODE (mode)
+ && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
&& !SPE_VECTOR_MODE (mode)
&& mode != TFmode
&& mode != TDmode
@@ -4846,12 +5071,15 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
&& TARGET_UPDATE
&& legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
return 1;
- if (legitimate_small_data_p (mode, x))
+ if (virtual_stack_registers_memory_p (x))
return 1;
- if (legitimate_constant_pool_address_p (x))
+ if (reg_offset_p && legitimate_small_data_p (mode, x))
+ return 1;
+ if (reg_offset_p && legitimate_constant_pool_address_p (x))
return 1;
/* If not REG_OK_STRICT (before reload) let pass any stack offset. */
if (! reg_ok_strict
+ && reg_offset_p
&& GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 0)) == REG
&& (XEXP (x, 0) == virtual_stack_vars_rtx
@@ -4872,6 +5100,12 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
&& legitimate_indexed_address_p (x, reg_ok_strict))
return 1;
if (GET_CODE (x) == PRE_MODIFY
+ && VECTOR_MEM_VSX_P (mode)
+ && TARGET_UPDATE
+ && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)
+ && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+ return 1;
+ if (GET_CODE (x) == PRE_MODIFY
&& mode != TImode
&& mode != TFmode
&& mode != TDmode
@@ -4879,7 +5113,7 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
|| TARGET_POWERPC64
|| ((mode != DFmode && mode != DDmode) || TARGET_E500_DOUBLE))
&& (TARGET_POWERPC64 || mode != DImode)
- && !ALTIVEC_VECTOR_MODE (mode)
+ && !VECTOR_MEM_ALTIVEC_P (mode)
&& !SPE_VECTOR_MODE (mode)
/* Restrict addressing for DI because of our SUBREG hackery. */
&& !(TARGET_E500_DOUBLE
@@ -4891,23 +5125,41 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
&& legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
&& rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
return 1;
- if (legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
+ if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
return 1;
return 0;
}
+/* Debug version of rs6000_legitimate_address_p. */
+static bool
+rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x,
+ bool reg_ok_strict)
+{
+ bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
+ fprintf (stderr,
+ "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
+ "strict = %d, code = %s\n",
+ ret ? "true" : "false",
+ GET_MODE_NAME (mode),
+ reg_ok_strict,
+ GET_RTX_NAME (GET_CODE (x)));
+ debug_rtx (x);
+
+ return ret;
+}
+
/* Go to LABEL if ADDR (a legitimate address expression)
has an effect that depends on the machine mode it is used for.
On the RS/6000 this is true of all integral offsets (since AltiVec
- modes don't allow them) or is a pre-increment or decrement.
+ and VSX modes don't allow them) or is a pre-increment or decrement.
??? Except that due to conceptual problems in offsettable_address_p
we can't really report the problems of integral offsets. So leave
this assuming that the adjustable offset must be valid for the
sub-words of a TFmode operand, which is what we had before. */
-bool
+static bool
rs6000_mode_dependent_address (rtx addr)
{
switch (GET_CODE (addr))
@@ -4934,6 +5186,19 @@ rs6000_mode_dependent_address (rtx addr)
return false;
}
+/* Debug version of rs6000_mode_dependent_address. */
+static bool
+rs6000_debug_mode_dependent_address (rtx addr)
+{
+ bool ret = rs6000_mode_dependent_address (addr);
+
+ fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
+ ret ? "true" : "false");
+ debug_rtx (addr);
+
+ return ret;
+}
+
/* Implement FIND_BASE_TERM. */
rtx
@@ -5271,6 +5536,20 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
operands[0] = dest;
operands[1] = source;
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr,
+ "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
+ "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
+ GET_MODE_NAME (mode),
+ reload_in_progress,
+ reload_completed,
+ can_create_pseudo_p ());
+ debug_rtx (dest);
+ fprintf (stderr, "source:\n");
+ debug_rtx (source);
+ }
+
/* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
if (GET_CODE (operands[1]) == CONST_DOUBLE
&& ! FLOAT_MODE_P (mode)
@@ -5651,7 +5930,7 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
break;
default:
- gcc_unreachable ();
+ fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
}
/* Above, we may have called force_const_mem which may have returned
@@ -9632,6 +9911,9 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (fcode == RS6000_BUILTIN_RSQRTF)
return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
+ if (fcode == RS6000_BUILTIN_BSWAP_HI)
+ return rs6000_expand_unop_builtin (CODE_FOR_bswaphi2, exp, target);
+
if (fcode == ALTIVEC_BUILTIN_MASK_FOR_LOAD
|| fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
{
@@ -9861,7 +10143,7 @@ rs6000_init_builtins (void)
spe_init_builtins ();
if (TARGET_ALTIVEC)
altivec_init_builtins ();
- if (TARGET_ALTIVEC || TARGET_SPE || TARGET_PAIRED_FLOAT)
+ if (TARGET_ALTIVEC || TARGET_SPE || TARGET_PAIRED_FLOAT || TARGET_VSX)
rs6000_common_init_builtins ();
if (TARGET_PPC_GFXOPT)
{
@@ -9888,6 +10170,14 @@ rs6000_init_builtins (void)
RS6000_BUILTIN_RECIP);
}
+ if (TARGET_POWERPC)
+ {
+ tree ftype = build_function_type_list (unsigned_intHI_type_node,
+ unsigned_intHI_type_node,
+ NULL_TREE);
+ def_builtin (MASK_POWERPC, "__builtin_bswap16", ftype,
+ RS6000_BUILTIN_BSWAP_HI);
+ }
#if TARGET_XCOFF
/* AIX libm provides clog as __clog. */
@@ -11897,8 +12187,10 @@ rtx
rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
{
static bool eliminated = false;
+ rtx ret;
+
if (mode != SDmode)
- return assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
+ ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
else
{
rtx mem = cfun->machine->sdmode_stack_slot;
@@ -11910,8 +12202,20 @@ rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
cfun->machine->sdmode_stack_slot = mem;
eliminated = true;
}
- return mem;
+ ret = mem;
+ }
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
+ GET_MODE_NAME (mode));
+ if (!ret)
+ fprintf (stderr, "\tNULL_RTX\n");
+ else
+ debug_rtx (ret);
}
+
+ return ret;
}
static tree
@@ -11946,7 +12250,6 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
return NULL_TREE;
}
-
/* Allocate a 64-bit stack slot to be used for copying SDmode
values through if this function has any SDmode references. */
@@ -11996,13 +12299,163 @@ rs6000_instantiate_decls (void)
instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
}
+/* Given an rtx X being reloaded into a reg required to be
+ in class CLASS, return the class of reg to actually use.
+ In general this is just CLASS; but on some machines
+ in some cases it is preferable to use a more restrictive class.
+
+ On the RS/6000, we have to return NO_REGS when we want to reload a
+ floating-point CONST_DOUBLE to force it to be copied to memory.
+
+ We also don't want to reload integer values into floating-point
+ registers if we can at all help it. In fact, this can
+ cause reload to die, if it tries to generate a reload of CTR
+ into a FP register and discovers it doesn't have the memory location
+ required.
+
+ ??? Would it be a good idea to have reload do the converse, that is
+ try to reload floating modes into FP registers if possible?
+ */
+
+static enum reg_class
+rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ if (VECTOR_UNIT_VSX_P (mode)
+ && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
+ return rclass;
+
+ if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
+ && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
+ && easy_vector_constant (x, mode))
+ return ALTIVEC_REGS;
+
+ if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
+ return NO_REGS;
+
+ if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
+ return GENERAL_REGS;
+
+ /* For VSX, prefer the traditional registers for DF if the address is of the
+ form reg+offset because we can use the non-VSX loads. Prefer the Altivec
+ registers if Altivec is handling the vector operations (i.e. V16QI, V8HI,
+ and V4SI). */
+ if (rclass == VSX_REGS && VECTOR_MEM_VSX_P (mode))
+ {
+ if (mode == DFmode && GET_CODE (x) == MEM)
+ {
+ rtx addr = XEXP (x, 0);
+
+ if (legitimate_indirect_address_p (addr, false)) /* reg */
+ return VSX_REGS;
+
+ if (legitimate_indexed_address_p (addr, false)) /* reg+reg */
+ return VSX_REGS;
+
+ if (GET_CODE (addr) == PRE_MODIFY
+ && legitimate_indexed_address_p (XEXP (addr, 0), false))
+ return VSX_REGS;
+
+ return FLOAT_REGS;
+ }
+
+ if (VECTOR_UNIT_ALTIVEC_P (mode))
+ return ALTIVEC_REGS;
+
+ return rclass;
+ }
+
+ return rclass;
+}
+
+/* Debug version of rs6000_preferred_reload_class. */
+static enum reg_class
+rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+ enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
+
+ fprintf (stderr,
+ "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
+ "mode = %s, x:\n",
+ reg_class_names[ret], reg_class_names[rclass],
+ GET_MODE_NAME (GET_MODE (x)));
+ debug_rtx (x);
+
+ return ret;
+}
+
+/* If we are copying between FP or AltiVec registers and anything else, we need
+ a memory location. The exception is when we are targeting ppc64 and the
+ move to/from fpr to gpr instructions are available. Also, under VSX, you
+ can copy vector registers from the FP register set to the Altivec register
+ set and vice versa. */
+
+static bool
+rs6000_secondary_memory_needed (enum reg_class class1,
+ enum reg_class class2,
+ enum machine_mode mode)
+{
+ if (class1 == class2)
+ return false;
+
+ /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
+ ALTIVEC_REGS, and FLOAT_REGS). We don't need to use memory to copy
+ between these classes. But we need memory for other things that can go in
+ FLOAT_REGS like SFmode. */
+ if (TARGET_VSX
+ && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
+ && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
+ || class1 == FLOAT_REGS))
+ return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
+ && class2 != FLOAT_REGS);
+
+ if (class1 == VSX_REGS || class2 == VSX_REGS)
+ return true;
+
+ if (class1 == FLOAT_REGS
+ && (!TARGET_MFPGPR || !TARGET_POWERPC64
+ || ((mode != DFmode)
+ && (mode != DDmode)
+ && (mode != DImode))))
+ return true;
+
+ if (class2 == FLOAT_REGS
+ && (!TARGET_MFPGPR || !TARGET_POWERPC64
+ || ((mode != DFmode)
+ && (mode != DDmode)
+ && (mode != DImode))))
+ return true;
+
+ if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+ return true;
+
+ return false;
+}
+
+/* Debug version of rs6000_secondary_memory_needed. */
+static bool
+rs6000_debug_secondary_memory_needed (enum reg_class class1,
+ enum reg_class class2,
+ enum machine_mode mode)
+{
+ bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
+
+ fprintf (stderr,
+ "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
+ "class2 = %s, mode = %s\n",
+ ret ? "true" : "false", reg_class_names[class1],
+ reg_class_names[class2], GET_MODE_NAME (mode));
+
+ return ret;
+}
+
/* Return the register class of a scratch register needed to copy IN into
or out of a register in RCLASS in MODE. If it can be done directly,
NO_REGS is returned. */
-enum reg_class
-rs6000_secondary_reload_class (enum reg_class rclass,
- enum machine_mode mode ATTRIBUTE_UNUSED,
+static enum reg_class
+rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
rtx in)
{
int regno;
@@ -12071,6 +12524,83 @@ rs6000_secondary_reload_class (enum reg_class rclass,
/* Otherwise, we need GENERAL_REGS. */
return GENERAL_REGS;
}
+
+/* Debug version of rs6000_secondary_reload_class. */
+static enum reg_class
+rs6000_debug_secondary_reload_class (enum reg_class rclass,
+ enum machine_mode mode, rtx in)
+{
+ enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
+ fprintf (stderr,
+ "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
+ "mode = %s, input rtx:\n",
+ reg_class_names[ret], reg_class_names[rclass],
+ GET_MODE_NAME (mode));
+ debug_rtx (in);
+
+ return ret;
+}
+
+/* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
+
+static bool
+rs6000_cannot_change_mode_class (enum machine_mode from,
+ enum machine_mode to,
+ enum reg_class rclass)
+{
+ unsigned from_size = GET_MODE_SIZE (from);
+ unsigned to_size = GET_MODE_SIZE (to);
+
+ if (from_size != to_size)
+ {
+ enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
+ return ((from_size < 8 || to_size < 8 || TARGET_IEEEQUAD)
+ && reg_classes_intersect_p (xclass, rclass));
+ }
+
+ if (TARGET_E500_DOUBLE
+ && ((((to) == DFmode) + ((from) == DFmode)) == 1
+ || (((to) == TFmode) + ((from) == TFmode)) == 1
+ || (((to) == DDmode) + ((from) == DDmode)) == 1
+ || (((to) == TDmode) + ((from) == TDmode)) == 1
+ || (((to) == DImode) + ((from) == DImode)) == 1))
+ return true;
+
+ /* Since the VSX register set includes traditional floating point registers
+ and altivec registers, just check for the size being different instead of
+ trying to check whether the modes are vector modes. Otherwise it won't
+ allow say DF and DI to change classes. */
+ if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
+ return (from_size != 8 && from_size != 16);
+
+ if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
+ && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
+ return true;
+
+ if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
+ && reg_classes_intersect_p (GENERAL_REGS, rclass))
+ return true;
+
+ return false;
+}
+
+/* Debug version of rs6000_cannot_change_mode_class. */
+static bool
+rs6000_debug_cannot_change_mode_class (enum machine_mode from,
+ enum machine_mode to,
+ enum reg_class rclass)
+{
+ bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
+
+ fprintf (stderr,
+ "rs6000_cannot_change_mode_class, return %s, from = %s, "
+ "to = %s, rclass = %s\n",
+ ret ? "true" : "false",
+ GET_MODE_NAME (from), GET_MODE_NAME (to),
+ reg_class_names[rclass]);
+
+ return ret;
+}
/* Given a comparison operation, return the bit number in CCR to test. We
know this is a valid comparison.
@@ -12364,7 +12894,7 @@ print_operand (FILE *file, rtx x, int code)
case 'c':
/* X is a CR register. Print the number of the GT bit of the CR. */
if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
- output_operand_lossage ("invalid %%E value");
+ output_operand_lossage ("invalid %%c value");
else
fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 1);
return;
@@ -12801,6 +13331,26 @@ print_operand (FILE *file, rtx x, int code)
fprintf (file, "%d", i + 1);
return;
+ case 'x':
+ /* X is a FPR or Altivec register used in a VSX context. */
+ if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
+ output_operand_lossage ("invalid %%x value");
+ else
+ {
+ int reg = REGNO (x);
+ int vsx_reg = (FP_REGNO_P (reg)
+ ? reg - 32
+ : reg - FIRST_ALTIVEC_REGNO + 32);
+
+#ifdef TARGET_REGNAMES
+ if (TARGET_REGNAMES)
+ fprintf (file, "%%vs%d", vsx_reg);
+ else
+#endif
+ fprintf (file, "%d", vsx_reg);
+ }
+ return;
+
case 'X':
if (GET_CODE (x) == MEM
&& (legitimate_indexed_address_p (XEXP (x, 0), 0)
@@ -12913,13 +13463,16 @@ print_operand (FILE *file, rtx x, int code)
/* Fall through. Must be [reg+reg]. */
}
- if (TARGET_ALTIVEC
+ if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
&& GET_CODE (tmp) == AND
&& GET_CODE (XEXP (tmp, 1)) == CONST_INT
&& INTVAL (XEXP (tmp, 1)) == -16)
tmp = XEXP (tmp, 0);
+ else if (VECTOR_MEM_VSX_P (GET_MODE (x))
+ && GET_CODE (tmp) == PRE_MODIFY)
+ tmp = XEXP (tmp, 1);
if (GET_CODE (tmp) == REG)
- fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
+ fprintf (file, "%s,%s", reg_names[0], reg_names[REGNO (tmp)]);
else
{
if (!GET_CODE (tmp) == PLUS
@@ -18817,7 +19370,8 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
if (GET_CODE (x) == CONST)
{
- gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS);
+ gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
base = XEXP (XEXP (x, 0), 0);
offset = INTVAL (XEXP (XEXP (x, 0), 1));
@@ -19235,15 +19789,15 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
/* Data dependency; DEP_INSN writes a register that INSN reads
some cycles later. */
- /* Separate a load from a narrower, dependent store. */
- if (rs6000_sched_groups
- && GET_CODE (PATTERN (insn)) == SET
- && GET_CODE (PATTERN (dep_insn)) == SET
- && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
- && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
- && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
- > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
- return cost + 14;
+ /* Separate a load from a narrower, dependent store. */
+ if (rs6000_sched_groups
+ && GET_CODE (PATTERN (insn)) == SET
+ && GET_CODE (PATTERN (dep_insn)) == SET
+ && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
+ && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
+ && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
+ > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
+ return cost + 14;
attr_type = get_attr_type (insn);
@@ -19269,6 +19823,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|| rs6000_cpu_attr == CPU_PPC7450
|| rs6000_cpu_attr == CPU_POWER4
|| rs6000_cpu_attr == CPU_POWER5
+ || rs6000_cpu_attr == CPU_POWER7
|| rs6000_cpu_attr == CPU_CELL)
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0))
@@ -19283,7 +19838,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
case TYPE_FPCOMPARE:
case TYPE_CR_LOGICAL:
case TYPE_DELAYED_CR:
- return cost + 2;
+ return cost + 2;
default:
break;
}
@@ -19328,7 +19883,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
if (! store_data_bypass_p (dep_insn, insn))
return 6;
break;
- }
+ }
case TYPE_INTEGER:
case TYPE_COMPARE:
case TYPE_FAST_COMPARE:
@@ -19374,7 +19929,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
break;
}
}
- break;
+ break;
case TYPE_LOAD:
case TYPE_LOAD_U:
@@ -19469,7 +20024,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
break;
}
- /* Fall out to return default cost. */
+ /* Fall out to return default cost. */
}
break;
@@ -19508,6 +20063,35 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
return cost;
}
+/* Debug version of rs6000_adjust_cost. */
+
+static int
+rs6000_debug_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+ int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
+
+ if (ret != cost)
+ {
+ const char *dep;
+
+ switch (REG_NOTE_KIND (link))
+ {
+ default: dep = "unknown depencency"; break;
+ case REG_DEP_TRUE: dep = "data dependency"; break;
+ case REG_DEP_OUTPUT: dep = "output dependency"; break;
+ case REG_DEP_ANTI: dep = "anti depencency"; break;
+ }
+
+ fprintf (stderr,
+ "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
+ "%s, insn:\n", ret, cost, dep);
+
+ debug_rtx (insn);
+ }
+
+ return ret;
+}
+
/* The function returns a true if INSN is microcoded.
Return false otherwise. */
@@ -19789,6 +20373,7 @@ rs6000_issue_rate (void)
case CPU_POWER4:
case CPU_POWER5:
case CPU_POWER6:
+ case CPU_POWER7:
return 5;
default:
return 1;
@@ -20390,6 +20975,41 @@ insn_must_be_first_in_group (rtx insn)
break;
}
break;
+ case PROCESSOR_POWER7:
+ type = get_attr_type (insn);
+
+ switch (type)
+ {
+ case TYPE_CR_LOGICAL:
+ case TYPE_MFCR:
+ case TYPE_MFCRF:
+ case TYPE_MTCR:
+ case TYPE_IDIV:
+ case TYPE_LDIV:
+ case TYPE_COMPARE:
+ case TYPE_DELAYED_COMPARE:
+ case TYPE_VAR_DELAYED_COMPARE:
+ case TYPE_ISYNC:
+ case TYPE_LOAD_L:
+ case TYPE_STORE_C:
+ case TYPE_LOAD_U:
+ case TYPE_LOAD_UX:
+ case TYPE_LOAD_EXT:
+ case TYPE_LOAD_EXT_U:
+ case TYPE_LOAD_EXT_UX:
+ case TYPE_STORE_U:
+ case TYPE_STORE_UX:
+ case TYPE_FPLOAD_U:
+ case TYPE_FPLOAD_UX:
+ case TYPE_FPSTORE_U:
+ case TYPE_FPSTORE_UX:
+ case TYPE_MFJMPR:
+ case TYPE_MTJMPR:
+ return true;
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -20451,6 +21071,23 @@ insn_must_be_last_in_group (rtx insn)
break;
}
break;
+ case PROCESSOR_POWER7:
+ type = get_attr_type (insn);
+
+ switch (type)
+ {
+ case TYPE_ISYNC:
+ case TYPE_SYNC:
+ case TYPE_LOAD_L:
+ case TYPE_STORE_C:
+ case TYPE_LOAD_EXT_U:
+ case TYPE_LOAD_EXT_UX:
+ case TYPE_STORE_UX:
+ return true;
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -22555,6 +23192,43 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int *total,
return false;
}
+/* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
+
+static bool
+rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed)
+{
+ bool ret = rs6000_rtx_costs (x, code, outer_code, total, speed);
+
+ fprintf (stderr,
+ "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
+ "total = %d, speed = %s, x:\n",
+ ret ? "complete" : "scan inner",
+ GET_RTX_NAME (code),
+ GET_RTX_NAME (outer_code),
+ *total,
+ speed ? "true" : "false");
+
+ debug_rtx (x);
+
+ return ret;
+}
+
+/* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
+
+static int
+rs6000_debug_address_cost (rtx x, bool speed)
+{
+ int ret = TARGET_ADDRESS_COST (x, speed);
+
+ fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
+ ret, speed ? "true" : "false");
+ debug_rtx (x);
+
+ return ret;
+}
+
+
/* A C expression returning the cost of moving data from a register of class
CLASS1 to one of CLASS2. */
@@ -22562,6 +23236,8 @@ int
rs6000_register_move_cost (enum machine_mode mode,
enum reg_class from, enum reg_class to)
{
+ int ret;
+
/* Moves from/to GENERAL_REGS. */
if (reg_classes_intersect_p (to, GENERAL_REGS)
|| reg_classes_intersect_p (from, GENERAL_REGS))
@@ -22569,34 +23245,48 @@ rs6000_register_move_cost (enum machine_mode mode,
if (! reg_classes_intersect_p (to, GENERAL_REGS))
from = to;
- if (from == FLOAT_REGS || from == ALTIVEC_REGS)
- return (rs6000_memory_move_cost (mode, from, 0)
- + rs6000_memory_move_cost (mode, GENERAL_REGS, 0));
+ if (from == FLOAT_REGS || from == ALTIVEC_REGS || from == VSX_REGS)
+ ret = (rs6000_memory_move_cost (mode, from, 0)
+ + rs6000_memory_move_cost (mode, GENERAL_REGS, 0));
/* It's more expensive to move CR_REGS than CR0_REGS because of the
shift. */
else if (from == CR_REGS)
- return 4;
+ ret = 4;
/* Power6 has slower LR/CTR moves so make them more expensive than
memory in order to bias spills to memory .*/
else if (rs6000_cpu == PROCESSOR_POWER6
&& reg_classes_intersect_p (from, LINK_OR_CTR_REGS))
- return 6 * hard_regno_nregs[0][mode];
+ ret = 6 * hard_regno_nregs[0][mode];
else
/* A move will cost one instruction per GPR moved. */
- return 2 * hard_regno_nregs[0][mode];
+ ret = 2 * hard_regno_nregs[0][mode];
}
+ /* If we have VSX, we can easily move between FPR or Altivec registers. */
+ else if (VECTOR_UNIT_VSX_P (mode)
+ && reg_classes_intersect_p (to, VSX_REGS)
+ && reg_classes_intersect_p (from, VSX_REGS))
+ ret = 2 * hard_regno_nregs[32][mode];
+
/* Moving between two similar registers is just one instruction. */
else if (reg_classes_intersect_p (to, from))
- return (mode == TFmode || mode == TDmode) ? 4 : 2;
+ ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
/* Everything else has to go through GENERAL_REGS. */
else
- return (rs6000_register_move_cost (mode, GENERAL_REGS, to)
- + rs6000_register_move_cost (mode, from, GENERAL_REGS));
+ ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
+ + rs6000_register_move_cost (mode, from, GENERAL_REGS));
+
+ if (TARGET_DEBUG_COST)
+ fprintf (stderr,
+ "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
+ ret, GET_MODE_NAME (mode), reg_class_names[from],
+ reg_class_names[to]);
+
+ return ret;
}
/* A C expressions returning the cost of moving data of MODE from a register to
@@ -22606,14 +23296,23 @@ int
rs6000_memory_move_cost (enum machine_mode mode, enum reg_class rclass,
int in ATTRIBUTE_UNUSED)
{
+ int ret;
+
if (reg_classes_intersect_p (rclass, GENERAL_REGS))
- return 4 * hard_regno_nregs[0][mode];
+ ret = 4 * hard_regno_nregs[0][mode];
else if (reg_classes_intersect_p (rclass, FLOAT_REGS))
- return 4 * hard_regno_nregs[32][mode];
+ ret = 4 * hard_regno_nregs[32][mode];
else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
- return 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
+ ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
else
- return 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
+ ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
+
+ if (TARGET_DEBUG_COST)
+ fprintf (stderr,
+ "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
+ ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
+
+ return ret;
}
/* Returns a code for a target-specific builtin that implements
@@ -22829,8 +23528,8 @@ rs6000_emit_swrsqrtsf (rtx dst, rtx src)
emit_label (XEXP (label, 0));
}
-/* Emit popcount intrinsic on TARGET_POPCNTB targets. DST is the
- target, and SRC is the argument operand. */
+/* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
+ (Power7) targets. DST is the target, and SRC is the argument operand. */
void
rs6000_emit_popcount (rtx dst, rtx src)
@@ -22838,6 +23537,16 @@ rs6000_emit_popcount (rtx dst, rtx src)
enum machine_mode mode = GET_MODE (dst);
rtx tmp1, tmp2;
+ /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
+ if (TARGET_POPCNTD)
+ {
+ if (mode == SImode)
+ emit_insn (gen_popcntwsi2 (dst, src));
+ else
+ emit_insn (gen_popcntddi2 (dst, src));
+ return;
+ }
+
tmp1 = gen_reg_rtx (mode);
if (mode == SImode)
@@ -23254,7 +23963,7 @@ rs6000_vector_mode_supported_p (enum machine_mode mode)
if (TARGET_SPE && SPE_VECTOR_MODE (mode))
return true;
- else if (TARGET_ALTIVEC && ALTIVEC_VECTOR_MODE (mode))
+ else if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode))
return true;
else
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 712ca9b..1c686c2 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1194,6 +1194,7 @@ enum reg_class
GENERAL_REGS,
FLOAT_REGS,
ALTIVEC_REGS,
+ VSX_REGS,
VRSAVE_REGS,
VSCR_REGS,
SPE_ACC_REGS,
@@ -1224,6 +1225,7 @@ enum reg_class
"GENERAL_REGS", \
"FLOAT_REGS", \
"ALTIVEC_REGS", \
+ "VSX_REGS", \
"VRSAVE_REGS", \
"VSCR_REGS", \
"SPE_ACC_REGS", \
@@ -1253,6 +1255,7 @@ enum reg_class
{ 0xffffffff, 0x00000000, 0x00000008, 0x00020000 }, /* GENERAL_REGS */ \
{ 0x00000000, 0xffffffff, 0x00000000, 0x00000000 }, /* FLOAT_REGS */ \
{ 0x00000000, 0x00000000, 0xffffe000, 0x00001fff }, /* ALTIVEC_REGS */ \
+ { 0x00000000, 0xffffffff, 0xffffe000, 0x00001fff }, /* VSX_REGS */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00002000 }, /* VRSAVE_REGS */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00004000 }, /* VSCR_REGS */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00008000 }, /* SPE_ACC_REGS */ \
@@ -1334,20 +1337,14 @@ extern enum reg_class rs6000_vector_reg_class[];
*/
#define PREFERRED_RELOAD_CLASS(X,CLASS) \
- ((CONSTANT_P (X) \
- && reg_classes_intersect_p ((CLASS), FLOAT_REGS)) \
- ? NO_REGS \
- : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT \
- && (CLASS) == NON_SPECIAL_REGS) \
- ? GENERAL_REGS \
- : (CLASS))
+ rs6000_preferred_reload_class_ptr (X, CLASS)
/* Return the register class of a scratch register needed to copy IN into
or out of a register in CLASS in MODE. If it can be done directly,
NO_REGS is returned. */
#define SECONDARY_RELOAD_CLASS(CLASS,MODE,IN) \
- rs6000_secondary_reload_class (CLASS, MODE, IN)
+ rs6000_secondary_reload_class_ptr (CLASS, MODE, IN)
/* If we are copying between FP or AltiVec registers and anything
else, we need a memory location. The exception is when we are
@@ -1355,18 +1352,7 @@ extern enum reg_class rs6000_vector_reg_class[];
are available.*/
#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE) \
- ((CLASS1) != (CLASS2) && (((CLASS1) == FLOAT_REGS \
- && (!TARGET_MFPGPR || !TARGET_POWERPC64 \
- || ((MODE != DFmode) \
- && (MODE != DDmode) \
- && (MODE != DImode)))) \
- || ((CLASS2) == FLOAT_REGS \
- && (!TARGET_MFPGPR || !TARGET_POWERPC64 \
- || ((MODE != DFmode) \
- && (MODE != DDmode) \
- && (MODE != DImode)))) \
- || (CLASS1) == ALTIVEC_REGS \
- || (CLASS2) == ALTIVEC_REGS))
+ rs6000_secondary_memory_needed_ptr (CLASS1, CLASS2, MODE)
/* For cpus that cannot load/store SDmode values from the 64-bit
FP registers without using a full 64-bit load/store, we need
@@ -1386,19 +1372,7 @@ extern enum reg_class rs6000_vector_reg_class[];
/* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
- (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
- ? ((GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8 \
- || TARGET_IEEEQUAD) \
- && reg_classes_intersect_p (FLOAT_REGS, CLASS)) \
- : (((TARGET_E500_DOUBLE \
- && ((((TO) == DFmode) + ((FROM) == DFmode)) == 1 \
- || (((TO) == TFmode) + ((FROM) == TFmode)) == 1 \
- || (((TO) == DDmode) + ((FROM) == DDmode)) == 1 \
- || (((TO) == TDmode) + ((FROM) == TDmode)) == 1 \
- || (((TO) == DImode) + ((FROM) == DImode)) == 1)) \
- || (TARGET_SPE \
- && (SPE_VECTOR_MODE (FROM) + SPE_VECTOR_MODE (TO)) == 1)) \
- && reg_classes_intersect_p (GENERAL_REGS, CLASS)))
+ rs6000_cannot_change_mode_class_ptr (FROM, TO, CLASS)
/* Stack layout; function entry, exit and calling. */
@@ -1897,7 +1871,7 @@ typedef struct rs6000_args
#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN) \
do { \
int win; \
- (X) = rs6000_legitimize_reload_address ((X), (MODE), (OPNUM), \
+ (X) = rs6000_legitimize_reload_address_ptr ((X), (MODE), (OPNUM), \
(int)(TYPE), (IND_LEVELS), &win); \
if ( win ) \
goto WIN; \
@@ -1908,7 +1882,7 @@ do { \
#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR,LABEL) \
do { \
- if (rs6000_mode_dependent_address (ADDR)) \
+ if (rs6000_mode_dependent_address_ptr (ADDR)) \
goto LABEL; \
} while (0)
@@ -3162,6 +3136,7 @@ enum rs6000_builtins
RS6000_BUILTIN_RECIP,
RS6000_BUILTIN_RECIPF,
RS6000_BUILTIN_RSQRTF,
+ RS6000_BUILTIN_BSWAP_HI,
RS6000_BUILTIN_COUNT
};
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9a4079c..64de3dc 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2264,15 +2264,102 @@
DONE;
})
-(define_insn "bswapsi2"
+;; Since the hardware zeros the upper part of the register, save generating the
+;; AND immediate if we are converting to unsigned
+(define_insn "*bswaphi2_extenddi"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (zero_extend:DI
+ (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))]
+ "TARGET_POWERPC64"
+ "lhbrx %0,%y1"
+ [(set_attr "length" "4")
+ (set_attr "type" "load")])
+
+(define_insn "*bswaphi2_extendsi"
+ [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+ (zero_extend:SI
+ (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))]
+ "TARGET_POWERPC"
+ "lhbrx %0,%y1"
+ [(set_attr "length" "4")
+ (set_attr "type" "load")])
+
+(define_expand "bswaphi2"
+ [(parallel [(set (match_operand:HI 0 "reg_or_mem_operand" "")
+ (bswap:HI
+ (match_operand:HI 1 "reg_or_mem_operand" "")))
+ (clobber (match_scratch:SI 2 ""))])]
+ ""
+{
+ if (!REG_P (operands[0]) && !REG_P (operands[1]))
+ operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_insn "bswaphi2_internal"
+ [(set (match_operand:HI 0 "reg_or_mem_operand" "=r,Z,&r")
+ (bswap:HI
+ (match_operand:HI 1 "reg_or_mem_operand" "Z,r,r")))
+ (clobber (match_scratch:SI 2 "=X,X,&r"))]
+ "TARGET_POWERPC"
+ "@
+ lhbrx %0,%y1
+ sthbrx %1,%y0
+ #"
+ [(set_attr "length" "4,4,12")
+ (set_attr "type" "load,store,*")])
+
+(define_split
+ [(set (match_operand:HI 0 "gpc_reg_operand" "")
+ (bswap:HI (match_operand:HI 1 "gpc_reg_operand" "")))
+ (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+ "TARGET_POWERPC && reload_completed"
+ [(set (match_dup 3)
+ (zero_extract:SI (match_dup 4)
+ (const_int 8)
+ (const_int 16)))
+ (set (match_dup 2)
+ (and:SI (ashift:SI (match_dup 4)
+ (const_int 8))
+ (const_int 65280))) ;; 0xff00
+ (set (match_dup 3)
+ (ior:SI (match_dup 3)
+ (match_dup 2)))]
+ "
+{
+ operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0);
+ operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+}")
+
+(define_insn "*bswapsi2_extenddi"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (zero_extend:DI
+ (bswap:SI (match_operand:SI 1 "memory_operand" "Z"))))]
+ "TARGET_POWERPC64"
+ "lwbrx %0,%y1"
+ [(set_attr "length" "4")
+ (set_attr "type" "load")])
+
+(define_expand "bswapsi2"
+ [(set (match_operand:SI 0 "reg_or_mem_operand" "")
+ (bswap:SI
+ (match_operand:SI 1 "reg_or_mem_operand" "")))]
+ ""
+{
+ if (!REG_P (operands[0]) && !REG_P (operands[1]))
+ operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "*bswapsi2_internal"
[(set (match_operand:SI 0 "reg_or_mem_operand" "=r,Z,&r")
- (bswap:SI (match_operand:SI 1 "reg_or_mem_operand" "Z,r,r")))]
+ (bswap:SI
+ (match_operand:SI 1 "reg_or_mem_operand" "Z,r,r")))]
""
"@
{lbrx|lwbrx} %0,%y1
{stbrx|stwbrx} %1,%y0
#"
- [(set_attr "length" "4,4,12")])
+ [(set_attr "length" "4,4,12")
+ (set_attr "type" "load,store,*")])
(define_split
[(set (match_operand:SI 0 "gpc_reg_operand" "")
@@ -2291,6 +2378,294 @@
(const_int 16)))]
"")
+(define_expand "bswapdi2"
+ [(parallel [(set (match_operand:DI 0 "reg_or_mem_operand" "")
+ (bswap:DI
+ (match_operand:DI 1 "reg_or_mem_operand" "")))
+ (clobber (match_scratch:DI 2 ""))
+ (clobber (match_scratch:DI 3 ""))
+ (clobber (match_scratch:DI 4 ""))])]
+ ""
+{
+ if (!REG_P (operands[0]) && !REG_P (operands[1]))
+ operands[1] = force_reg (DImode, operands[1]);
+
+ if (TARGET_32BIT)
+ {
+ /* 32-bit needs fewer scratch registers. */
+ emit_insn (gen_bswapdi2_32bit (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+;; Power7/cell has ldbrx/stdbrx, so use it directly
+(define_insn "*bswapdi2_ldbrx"
+ [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r")
+ (bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
+ (clobber (match_scratch:DI 2 "=X,X,&r"))
+ (clobber (match_scratch:DI 3 "=X,X,&r"))
+ (clobber (match_scratch:DI 4 "=X,X,&r"))]
+ "TARGET_POWERPC64 && TARGET_LDBRX
+ && (REG_P (operands[0]) || REG_P (operands[1]))"
+ "@
+ ldbrx %0,%y1
+ stdbrx %1,%y0
+ #"
+ [(set_attr "length" "4,4,36")
+ (set_attr "type" "load,store,*")])
+
+;; Non-power7/cell, fall back to use lwbrx/stwbrx
+(define_insn "*bswapdi2_64bit"
+ [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r")
+ (bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
+ (clobber (match_scratch:DI 2 "=&b,&b,&r"))
+ (clobber (match_scratch:DI 3 "=&b,&r,&r"))
+ (clobber (match_scratch:DI 4 "=&b,X,&r"))]
+ "TARGET_POWERPC64 && !TARGET_LDBRX
+ && (REG_P (operands[0]) || REG_P (operands[1]))"
+ "#"
+ [(set_attr "length" "16,12,36")])
+
+(define_split
+ [(set (match_operand:DI 0 "gpc_reg_operand" "")
+ (bswap:DI (match_operand:DI 1 "indexed_or_indirect_operand" "")))
+ (clobber (match_operand:DI 2 "gpc_reg_operand" ""))
+ (clobber (match_operand:DI 3 "gpc_reg_operand" ""))
+ (clobber (match_operand:DI 4 "gpc_reg_operand" ""))]
+ "TARGET_POWERPC64 && !TARGET_LDBRX && reload_completed"
+ [(const_int 0)]
+ "
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx op4 = operands[4];
+ rtx op3_32 = simplify_gen_subreg (SImode, op3, DImode, 4);
+ rtx op4_32 = simplify_gen_subreg (SImode, op4, DImode, 4);
+ rtx addr1;
+ rtx addr2;
+ rtx word_high;
+ rtx word_low;
+
+ addr1 = XEXP (src, 0);
+ if (GET_CODE (addr1) == PLUS)
+ {
+ emit_insn (gen_adddi3 (op2, XEXP (addr1, 0), GEN_INT (4)));
+ addr1 = XEXP (addr1, 1);
+ }
+ else
+ emit_move_insn (op2, GEN_INT (4));
+
+ addr2 = gen_rtx_PLUS (DImode, op2, addr1);
+
+ if (BYTES_BIG_ENDIAN)
+ {
+ word_high = change_address (src, SImode, addr1);
+ word_low = change_address (src, SImode, addr2);
+ }
+ else
+ {
+ word_high = change_address (src, SImode, addr2);
+ word_low = change_address (src, SImode, addr1);
+ }
+
+ emit_insn (gen_bswapsi2 (op3_32, word_low));
+ emit_insn (gen_bswapsi2 (op4_32, word_high));
+ emit_insn (gen_ashldi3 (dest, op3, GEN_INT (32)));
+ emit_insn (gen_iordi3 (dest, dest, op4));
+}")
+
+(define_split
+ [(set (match_operand:DI 0 "indexed_or_indirect_operand" "")
+ (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+ (clobber (match_operand:DI 2 "gpc_reg_operand" ""))
+ (clobber (match_operand:DI 3 "gpc_reg_operand" ""))
+ (clobber (match_operand:DI 4 "" ""))]
+ "TARGET_POWERPC64 && reload_completed && !TARGET_LDBRX"
+ [(const_int 0)]
+ "
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx src_si = simplify_gen_subreg (SImode, src, DImode, 4);
+ rtx op3_si = simplify_gen_subreg (SImode, op3, DImode, 4);
+ rtx addr1;
+ rtx addr2;
+ rtx word_high;
+ rtx word_low;
+
+ addr1 = XEXP (dest, 0);
+ if (GET_CODE (addr1) == PLUS)
+ {
+ emit_insn (gen_adddi3 (op2, XEXP (addr1, 0), GEN_INT (4)));
+ addr1 = XEXP (addr1, 1);
+ }
+ else
+ emit_move_insn (op2, GEN_INT (4));
+
+ addr2 = gen_rtx_PLUS (DImode, op2, addr1);
+
+ emit_insn (gen_lshrdi3 (op3, src, GEN_INT (32)));
+ if (BYTES_BIG_ENDIAN)
+ {
+ word_high = change_address (dest, SImode, addr1);
+ word_low = change_address (dest, SImode, addr2);
+ emit_insn (gen_bswapsi2 (word_high, src_si));
+ emit_insn (gen_bswapsi2 (word_low, op3_si));
+ }
+ else
+ {
+ word_high = change_address (dest, SImode, addr2);
+ word_low = change_address (dest, SImode, addr1);
+ emit_insn (gen_bswapsi2 (word_low, src_si));
+ emit_insn (gen_bswapsi2 (word_high, op3_si));
+ }
+}")
+
+(define_split
+ [(set (match_operand:DI 0 "gpc_reg_operand" "")
+ (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+ (clobber (match_operand:DI 2 "gpc_reg_operand" ""))
+ (clobber (match_operand:DI 3 "gpc_reg_operand" ""))
+ (clobber (match_operand:DI 4 "" ""))]
+ "TARGET_POWERPC64 && reload_completed"
+ [(const_int 0)]
+ "
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx dest_si = simplify_gen_subreg (SImode, dest, DImode, 4);
+ rtx src_si = simplify_gen_subreg (SImode, src, DImode, 4);
+ rtx op2_si = simplify_gen_subreg (SImode, op2, DImode, 4);
+ rtx op3_si = simplify_gen_subreg (SImode, op3, DImode, 4);
+
+ emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32)));
+ emit_insn (gen_bswapsi2 (dest_si, src_si));
+ emit_insn (gen_bswapsi2 (op3_si, op2_si));
+ emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32)));
+ emit_insn (gen_iordi3 (dest, dest, op3));
+}")
+
+(define_insn "bswapdi2_32bit"
+ [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r")
+ (bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
+ (clobber (match_scratch:SI 2 "=&b,&b,X"))]
+ "TARGET_32BIT && (REG_P (operands[0]) || REG_P (operands[1]))"
+ "#"
+ [(set_attr "length" "16,12,36")])
+
+(define_split
+ [(set (match_operand:DI 0 "gpc_reg_operand" "")
+ (bswap:DI (match_operand:DI 1 "indexed_or_indirect_operand" "")))
+ (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+ "TARGET_32BIT && reload_completed"
+ [(const_int 0)]
+ "
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx op2 = operands[2];
+ rtx dest_hi = simplify_gen_subreg (SImode, dest, DImode, 0);
+ rtx dest_lo = simplify_gen_subreg (SImode, dest, DImode, 4);
+ rtx addr1;
+ rtx addr2;
+ rtx word_high;
+ rtx word_low;
+
+ addr1 = XEXP (src, 0);
+ if (GET_CODE (addr1) == PLUS)
+ {
+ emit_insn (gen_adddi3 (op2, XEXP (addr1, 0), GEN_INT (4)));
+ addr1 = XEXP (addr1, 1);
+ }
+ else
+ emit_move_insn (op2, GEN_INT (4));
+
+ addr2 = gen_rtx_PLUS (DImode, op2, addr1);
+
+ if (BYTES_BIG_ENDIAN)
+ {
+ word_high = change_address (src, SImode, addr1);
+ word_low = change_address (src, SImode, addr2);
+ }
+ else
+ {
+ word_high = change_address (src, SImode, addr2);
+ word_low = change_address (src, SImode, addr1);
+ }
+
+ emit_insn (gen_bswapsi2 (dest_hi, word_low));
+ emit_insn (gen_bswapsi2 (dest_lo, word_high));
+}")
+
+(define_split
+ [(set (match_operand:DI 0 "indexed_or_indirect_operand" "")
+ (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+ (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+ "TARGET_32BIT && reload_completed"
+ [(const_int 0)]
+ "
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx op2 = operands[2];
+ rtx src_high = simplify_gen_subreg (SImode, src, DImode, 0);
+ rtx src_low = simplify_gen_subreg (SImode, src, DImode, 4);
+ rtx addr1;
+ rtx addr2;
+ rtx word_high;
+ rtx word_low;
+
+ addr1 = XEXP (dest, 0);
+ if (GET_CODE (addr1) == PLUS)
+ {
+ emit_insn (gen_addsi3 (op2, XEXP (addr1, 0), GEN_INT (4)));
+ addr1 = XEXP (addr1, 1);
+ }
+ else
+ emit_move_insn (op2, GEN_INT (4));
+
+ addr2 = gen_rtx_PLUS (SImode, op2, addr1);
+
+ if (BYTES_BIG_ENDIAN)
+ {
+ word_high = change_address (dest, SImode, addr1);
+ word_low = change_address (dest, SImode, addr2);
+ }
+ else
+ {
+ word_high = change_address (dest, SImode, addr2);
+ word_low = change_address (dest, SImode, addr1);
+ }
+
+ emit_insn (gen_bswapsi2 (word_high, src_low));
+ emit_insn (gen_bswapsi2 (word_low, src_high));
+}")
+
+(define_split
+ [(set (match_operand:DI 0 "gpc_reg_operand" "")
+ (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+ (clobber (match_operand:SI 2 "" ""))]
+ "TARGET_32BIT && reload_completed"
+ [(const_int 0)]
+ "
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx src_high = simplify_gen_subreg (SImode, src, DImode, 0);
+ rtx src_low = simplify_gen_subreg (SImode, src, DImode, 4);
+ rtx dest_high = simplify_gen_subreg (SImode, dest, DImode, 0);
+ rtx dest_low = simplify_gen_subreg (SImode, dest, DImode, 4);
+
+ emit_insn (gen_bswapsi2 (dest_high, src_low));
+ emit_insn (gen_bswapsi2 (dest_low, src_high));
+}")
+
(define_expand "mulsi3"
[(use (match_operand:SI 0 "gpc_reg_operand" ""))
(use (match_operand:SI 1 "gpc_reg_operand" ""))