aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog56
-rw-r--r--gcc/config/i386/i386.c388
-rw-r--r--gcc/testsuite/ChangeLog28
-rw-r--r--gcc/testsuite/gcc.target/i386/pr55247-2.c5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-10.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-11.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-12.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-13.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-14.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-15.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-16.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-17.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-18.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-19.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-2.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-20.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-21.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-22.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-3.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-4.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-5.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-6.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-7.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-8.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/pr70155-9.c17
26 files changed, 794 insertions, 39 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9f068ce..71c7d1f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,59 @@
+2016-04-27 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/70155
+ * config/i386/i386.c (scalar_to_vector_candidate_p): Renamed
+ to ...
+ (dimode_scalar_to_vector_candidate_p): This.
+ (timode_scalar_to_vector_candidate_p): New function.
+ (scalar_to_vector_candidate_p): Likewise.
+ (timode_check_non_convertible_regs): Likewise.
+ (timode_remove_non_convertible_regs): Likewise.
+ (remove_non_convertible_regs): Likewise.
+ (remove_non_convertible_regs): Renamed to ...
+ (dimode_remove_non_convertible_regs): This.
+ (scalar_chain::~scalar_chain): Make it virtual.
+ (scalar_chain::compute_convert_gain): Make it pure virtual.
+ (scalar_chain::mark_dual_mode_def): Likewise.
+ (scalar_chain::convert_insn): Likewise.
+ (scalar_chain::convert_registers): Likewise.
+ (scalar_chain::add_to_queue): Make it protected.
+ (scalar_chain::emit_conversion_insns): Likewise.
+ (scalar_chain::replace_with_subreg): Likewise.
+ (scalar_chain::replace_with_subreg_in_insn): Likewise.
+ (scalar_chain::convert_op): Likewise.
+ (scalar_chain::convert_reg): Likewise.
+ (scalar_chain::make_vector_copies): Likewise.
+ (scalar_chain::convert_registers): New pure virtual function.
+ (class dimode_scalar_chain): New class.
+ (class timode_scalar_chain): Likewise.
+ (scalar_chain::mark_dual_mode_def): Renamed to ...
+ (dimode_scalar_chain::mark_dual_mode_def): This.
+ (timode_scalar_chain::mark_dual_mode_def): New function.
+ (timode_scalar_chain::convert_insn): Likewise.
+ (dimode_scalar_chain::convert_registers): Likewise.
+ (scalar_chain::compute_convert_gain): Renamed to ...
+ (dimode_scalar_chain::compute_convert_gain): This.
+ (scalar_chain::replace_with_subreg): Renamed to ...
+ (dimode_scalar_chain::replace_with_subreg): This.
+ (scalar_chain::replace_with_subreg_in_insn): Renamed to ...
+ (dimode_scalar_chain::replace_with_subreg_in_insn): This.
+ (scalar_chain::make_vector_copies): Renamed to ...
+ (dimode_scalar_chain::make_vector_copies): This.
+ (scalar_chain::convert_reg): Renamed to ...
+ (dimode_scalar_chain::convert_reg ): This.
+ (scalar_chain::convert_op): Renamed to ...
+ (dimode_scalar_chain::convert_op): This.
+ (scalar_chain::convert_insn): Renamed to ...
+ (dimode_scalar_chain::convert_insn): This.
+ (scalar_chain::convert): Call convert_registers.
+ (convert_scalars_to_vector): Change to scalar_chain pointer to
+ use timode_scalar_chain in 64-bit mode and dimode_scalar_chain
+ in 32-bit mode. Delete scalar_chain pointer. Call
+ free_dominance_info in 64-bit mode.
+ (pass_stv::gate): Remove TARGET_64BIT check.
+ (ix86_option_override): Put the 64-bit STV pass before the CSE
+ pass.
+
2016-04-27 Pierre-Marie de Rodat <derodat@adacore.com>
* dwarf2out.h (struct dw_loc_descr_node): Remove the
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d8278fb..fa23a26 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2769,11 +2769,10 @@ convertible_comparison_p (rtx_insn *insn)
return true;
}
-/* Return 1 if INSN may be converted into vector
- instruction. */
+/* The DImode version of scalar_to_vector_candidate_p. */
static bool
-scalar_to_vector_candidate_p (rtx_insn *insn)
+dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
{
rtx def_set = single_set (insn);
@@ -2833,16 +2832,75 @@ scalar_to_vector_candidate_p (rtx_insn *insn)
return true;
}
-/* For a given bitmap of insn UIDs scans all instruction and
- remove insn from CANDIDATES in case it has both convertible
- and not convertible definitions.
+/* The TImode version of scalar_to_vector_candidate_p. */
- All insns in a bitmap are conversion candidates according to
- scalar_to_vector_candidate_p. Currently it implies all insns
- are single_set. */
+static bool
+timode_scalar_to_vector_candidate_p (rtx_insn *insn)
+{
+ rtx def_set = single_set (insn);
+
+ if (!def_set)
+ return false;
+
+ if (has_non_address_hard_reg (insn))
+ return false;
+
+ rtx src = SET_SRC (def_set);
+ rtx dst = SET_DEST (def_set);
+
+ /* Only TImode load and store are allowed. */
+ if (GET_MODE (dst) != TImode)
+ return false;
+
+ if (MEM_P (dst))
+ {
+ /* Check for store. Only support store from register or standard
+ SSE constants. Memory must be aligned or unaligned store is
+ optimal. */
+ if (misaligned_operand (dst, TImode)
+ && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
+ return false;
+
+ switch (GET_CODE (src))
+ {
+ default:
+ return false;
+
+ case REG:
+ return true;
+
+ case CONST_INT:
+ return standard_sse_constant_p (src, TImode);
+ }
+ }
+ else if (MEM_P (src))
+ {
+ /* Check for load. Memory must be aligned or unaligned load is
+ optimal. */
+ return (REG_P (dst)
+ && (!misaligned_operand (src, TImode)
+ || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
+ }
+
+ return false;
+}
+
+/* Return 1 if INSN may be converted into vector
+ instruction. */
+
+static bool
+scalar_to_vector_candidate_p (rtx_insn *insn)
+{
+ if (TARGET_64BIT)
+ return timode_scalar_to_vector_candidate_p (insn);
+ else
+ return dimode_scalar_to_vector_candidate_p (insn);
+}
+
+/* The DImode version of remove_non_convertible_regs. */
static void
-remove_non_convertible_regs (bitmap candidates)
+dimode_remove_non_convertible_regs (bitmap candidates)
{
bitmap_iterator bi;
unsigned id;
@@ -2893,11 +2951,132 @@ remove_non_convertible_regs (bitmap candidates)
BITMAP_FREE (regs);
}
+/* For a register REGNO, scan instructions for its defs and uses.
+ Put REGNO in REGS if a def or use isn't in CANDIDATES. */
+
+static void
+timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
+ unsigned int regno)
+{
+ for (df_ref def = DF_REG_DEF_CHAIN (regno);
+ def;
+ def = DF_REF_NEXT_REG (def))
+ {
+ if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "r%d has non convertible def in insn %d\n",
+ regno, DF_REF_INSN_UID (def));
+
+ bitmap_set_bit (regs, regno);
+ break;
+ }
+ }
+
+ for (df_ref ref = DF_REG_USE_CHAIN (regno);
+ ref;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ /* Debug instructions are skipped. */
+ if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
+ && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "r%d has non convertible use in insn %d\n",
+ regno, DF_REF_INSN_UID (ref));
+
+ bitmap_set_bit (regs, regno);
+ break;
+ }
+ }
+}
+
+/* The TImode version of remove_non_convertible_regs. */
+
+static void
+timode_remove_non_convertible_regs (bitmap candidates)
+{
+ bitmap_iterator bi;
+ unsigned id;
+ bitmap regs = BITMAP_ALLOC (NULL);
+
+ EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
+ {
+ rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
+ rtx dest = SET_DEST (def_set);
+ rtx src = SET_SRC (def_set);
+
+ if ((!REG_P (dest)
+ || bitmap_bit_p (regs, REGNO (dest))
+ || HARD_REGISTER_P (dest))
+ && (!REG_P (src)
+ || bitmap_bit_p (regs, REGNO (src))
+ || HARD_REGISTER_P (src)))
+ continue;
+
+ if (REG_P (dest))
+ timode_check_non_convertible_regs (candidates, regs,
+ REGNO (dest));
+
+ if (REG_P (src))
+ timode_check_non_convertible_regs (candidates, regs,
+ REGNO (src));
+ }
+
+ EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
+ {
+ for (df_ref def = DF_REG_DEF_CHAIN (id);
+ def;
+ def = DF_REF_NEXT_REG (def))
+ if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Removing insn %d from candidates list\n",
+ DF_REF_INSN_UID (def));
+
+ bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
+ }
+
+ for (df_ref ref = DF_REG_USE_CHAIN (id);
+ ref;
+ ref = DF_REF_NEXT_REG (ref))
+ if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Removing insn %d from candidates list\n",
+ DF_REF_INSN_UID (ref));
+
+ bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
+ }
+ }
+
+ BITMAP_FREE (regs);
+}
+
+/* For a given bitmap of insn UIDs scans all instruction and
+ remove insn from CANDIDATES in case it has both convertible
+ and not convertible definitions.
+
+ All insns in a bitmap are conversion candidates according to
+ scalar_to_vector_candidate_p. Currently it implies all insns
+ are single_set. */
+
+static void
+remove_non_convertible_regs (bitmap candidates)
+{
+ if (TARGET_64BIT)
+ timode_remove_non_convertible_regs (candidates);
+ else
+ dimode_remove_non_convertible_regs (candidates);
+}
+
class scalar_chain
{
public:
scalar_chain ();
- ~scalar_chain ();
+ virtual ~scalar_chain ();
static unsigned max_id;
@@ -2913,21 +3092,47 @@ class scalar_chain
bitmap defs_conv;
void build (bitmap candidates, unsigned insn_uid);
- int compute_convert_gain ();
+ virtual int compute_convert_gain () = 0;
int convert ();
+ protected:
+ void add_to_queue (unsigned insn_uid);
+ void emit_conversion_insns (rtx insns, rtx_insn *pos);
+
private:
void add_insn (bitmap candidates, unsigned insn_uid);
- void add_to_queue (unsigned insn_uid);
- void mark_dual_mode_def (df_ref def);
void analyze_register_chain (bitmap candidates, df_ref ref);
+ virtual void mark_dual_mode_def (df_ref def) = 0;
+ virtual void convert_insn (rtx_insn *insn) = 0;
+ virtual void convert_registers () = 0;
+};
+
+class dimode_scalar_chain : public scalar_chain
+{
+ public:
+ int compute_convert_gain ();
+ private:
+ void mark_dual_mode_def (df_ref def);
rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
- void emit_conversion_insns (rtx insns, rtx_insn *pos);
void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
void convert_insn (rtx_insn *insn);
void convert_op (rtx *op, rtx_insn *insn);
void convert_reg (unsigned regno);
void make_vector_copies (unsigned regno);
+ void convert_registers ();
+};
+
+class timode_scalar_chain : public scalar_chain
+{
+ public:
+ /* Convert from TImode to V1TImode is always faster. */
+ int compute_convert_gain () { return 1; }
+
+ private:
+ void mark_dual_mode_def (df_ref def);
+ void convert_insn (rtx_insn *insn);
+ /* We don't convert registers to difference size. */
+ void convert_registers () {}
};
unsigned scalar_chain::max_id = 0;
@@ -2973,10 +3178,11 @@ scalar_chain::add_to_queue (unsigned insn_uid)
bitmap_set_bit (queue, insn_uid);
}
-/* Mark register defined by DEF as requiring conversion. */
+/* For DImode conversion, mark register defined by DEF as requiring
+ conversion. */
void
-scalar_chain::mark_dual_mode_def (df_ref def)
+dimode_scalar_chain::mark_dual_mode_def (df_ref def)
{
gcc_assert (DF_REF_REG_DEF_P (def));
@@ -2991,6 +3197,14 @@ scalar_chain::mark_dual_mode_def (df_ref def)
bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
}
+/* For TImode conversion, it is unused. */
+
+void
+timode_scalar_chain::mark_dual_mode_def (df_ref)
+{
+ gcc_unreachable ();
+}
+
/* Check REF's chain to add new insns into a queue
and find registers requiring conversion. */
@@ -3117,7 +3331,7 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid)
/* Compute a gain for chain conversion. */
int
-scalar_chain::compute_convert_gain ()
+dimode_scalar_chain::compute_convert_gain ()
{
bitmap_iterator bi;
unsigned insn_uid;
@@ -3174,7 +3388,7 @@ scalar_chain::compute_convert_gain ()
/* Replace REG in X with a V2DI subreg of NEW_REG. */
rtx
-scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
+dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
{
if (x == reg)
return gen_rtx_SUBREG (V2DImode, new_reg, 0);
@@ -3197,7 +3411,8 @@ scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
/* Replace REG in INSN with a V2DI subreg of NEW_REG. */
void
-scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg)
+dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
+ rtx reg, rtx new_reg)
{
replace_with_subreg (single_set (insn), reg, new_reg);
}
@@ -3227,7 +3442,7 @@ scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
and replace its uses in a chain. */
void
-scalar_chain::make_vector_copies (unsigned regno)
+dimode_scalar_chain::make_vector_copies (unsigned regno)
{
rtx reg = regno_reg_rtx[regno];
rtx vreg = gen_reg_rtx (DImode);
@@ -3298,7 +3513,7 @@ scalar_chain::make_vector_copies (unsigned regno)
in case register is used in not convertible insn. */
void
-scalar_chain::convert_reg (unsigned regno)
+dimode_scalar_chain::convert_reg (unsigned regno)
{
bool scalar_copy = bitmap_bit_p (defs_conv, regno);
rtx reg = regno_reg_rtx[regno];
@@ -3390,7 +3605,7 @@ scalar_chain::convert_reg (unsigned regno)
registers conversion. */
void
-scalar_chain::convert_op (rtx *op, rtx_insn *insn)
+dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
{
*op = copy_rtx_if_shared (*op);
@@ -3434,7 +3649,7 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn)
/* Convert INSN to vector mode. */
void
-scalar_chain::convert_insn (rtx_insn *insn)
+dimode_scalar_chain::convert_insn (rtx_insn *insn)
{
rtx def_set = single_set (insn);
rtx src = SET_SRC (def_set);
@@ -3511,6 +3726,89 @@ scalar_chain::convert_insn (rtx_insn *insn)
df_insn_rescan (insn);
}
+/* Convert INSN from TImode to V1T1mode. */
+
+void
+timode_scalar_chain::convert_insn (rtx_insn *insn)
+{
+ rtx def_set = single_set (insn);
+ rtx src = SET_SRC (def_set);
+ rtx dst = SET_DEST (def_set);
+
+ switch (GET_CODE (dst))
+ {
+ case REG:
+ {
+ rtx tmp = find_reg_equal_equiv_note (insn);
+ if (tmp)
+ PUT_MODE (XEXP (tmp, 0), V1TImode);
+ }
+ /* FALLTHRU */
+ case MEM:
+ PUT_MODE (dst, V1TImode);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ switch (GET_CODE (src))
+ {
+ case REG:
+ case MEM:
+ PUT_MODE (src, V1TImode);
+ break;
+
+ case CONST_INT:
+ switch (standard_sse_constant_p (src, TImode))
+ {
+ case 1:
+ src = CONST0_RTX (GET_MODE (dst));
+ break;
+ case 2:
+ src = CONSTM1_RTX (GET_MODE (dst));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (NONDEBUG_INSN_P (insn))
+ {
+ rtx tmp = gen_reg_rtx (V1TImode);
+ /* Since there are no instructions to store standard SSE
+ constant, temporary register usage is required. */
+ emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
+ dst = tmp;
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ SET_SRC (def_set) = src;
+ SET_DEST (def_set) = dst;
+
+ /* Drop possible dead definitions. */
+ PATTERN (insn) = def_set;
+
+ INSN_CODE (insn) = -1;
+ recog_memoized (insn);
+ df_insn_rescan (insn);
+}
+
+void
+dimode_scalar_chain::convert_registers ()
+{
+ bitmap_iterator bi;
+ unsigned id;
+
+ EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
+ convert_reg (id);
+
+ EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
+ make_vector_copies (id);
+}
+
/* Convert whole chain creating required register
conversions and copies. */
@@ -3527,11 +3825,7 @@ scalar_chain::convert ()
if (dump_file)
fprintf (dump_file, "Converting chain #%d...\n", chain_id);
- EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
- convert_reg (id);
-
- EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
- make_vector_copies (id);
+ convert_registers ();
EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
{
@@ -3588,19 +3882,26 @@ convert_scalars_to_vector ()
while (!bitmap_empty_p (candidates))
{
unsigned uid = bitmap_first_set_bit (candidates);
- scalar_chain chain;
+ scalar_chain *chain;
+
+ if (TARGET_64BIT)
+ chain = new timode_scalar_chain;
+ else
+ chain = new dimode_scalar_chain;
/* Find instructions chain we want to convert to vector mode.
Check all uses and definitions to estimate all required
conversions. */
- chain.build (candidates, uid);
+ chain->build (candidates, uid);
- if (chain.compute_convert_gain () > 0)
- converted_insns += chain.convert ();
+ if (chain->compute_convert_gain () > 0)
+ converted_insns += chain->convert ();
else
if (dump_file)
fprintf (dump_file, "Chain #%d conversion is not profitable\n",
- chain.chain_id);
+ chain->chain_id);
+
+ delete chain;
}
if (dump_file)
@@ -3610,6 +3911,13 @@ convert_scalars_to_vector ()
bitmap_obstack_release (NULL);
df_process_deferred_rescans ();
+ /* FIXME: Since the CSE pass may change dominance info, which isn't
+ expected by the fwprop pass, call free_dominance_info to
+ invalidate dominance info. Otherwise, the fwprop pass may crash
+ when dominance info is changed. */
+ if (TARGET_64BIT)
+ free_dominance_info (CDI_DOMINATORS);
+
/* Conversion means we may have 128bit register spills/fills
which require aligned stack. */
if (converted_insns)
@@ -3683,7 +3991,7 @@ public:
/* opt_pass methods: */
virtual bool gate (function *)
{
- return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1;
+ return TARGET_STV && TARGET_SSE2 && optimize > 1;
}
virtual unsigned int execute (function *)
@@ -5596,17 +5904,23 @@ ix86_option_override (void)
1, PASS_POS_INSERT_AFTER
};
opt_pass *pass_stv = make_pass_stv (g);
- struct register_pass_info stv_info
+ struct register_pass_info stv_info_dimode
= { pass_stv, "combine",
1, PASS_POS_INSERT_AFTER
};
+ /* Run the 64-bit STV pass before the CSE pass so that CONST0_RTX and
+ CONSTM1_RTX generated by the STV pass can be CSEed. */
+ struct register_pass_info stv_info_timode
+ = { pass_stv, "cse2",
+ 1, PASS_POS_INSERT_BEFORE
+ };
ix86_option_override_internal (true, &global_options, &global_options_set);
/* This needs to be done at start up. It's convenient to do it here. */
register_pass (&insert_vzeroupper_info);
- register_pass (&stv_info);
+ register_pass (TARGET_64BIT ? &stv_info_timode : &stv_info_dimode);
}
/* Implement the TARGET_OFFLOAD_OPTIONS hook. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 0b69c6c..2aada78 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,33 @@
2016-04-27 H.J. Lu <hongjiu.lu@intel.com>
+ PR target/70155
+ * gcc.target/i386/pr55247-2.c: Updated to check movti_internal
+ and movv1ti_internal patterns
+ * gcc.target/i386/pr70155-1.c: New test.
+ * gcc.target/i386/pr70155-2.c: Likewise.
+ * gcc.target/i386/pr70155-3.c: Likewise.
+ * gcc.target/i386/pr70155-4.c: Likewise.
+ * gcc.target/i386/pr70155-5.c: Likewise.
+ * gcc.target/i386/pr70155-6.c: Likewise.
+ * gcc.target/i386/pr70155-7.c: Likewise.
+ * gcc.target/i386/pr70155-8.c: Likewise.
+ * gcc.target/i386/pr70155-9.c: Likewise.
+ * gcc.target/i386/pr70155-10.c: Likewise.
+ * gcc.target/i386/pr70155-11.c: Likewise.
+ * gcc.target/i386/pr70155-12.c: Likewise.
+ * gcc.target/i386/pr70155-13.c: Likewise.
+ * gcc.target/i386/pr70155-14.c: Likewise.
+ * gcc.target/i386/pr70155-15.c: Likewise.
+ * gcc.target/i386/pr70155-16.c: Likewise.
+ * gcc.target/i386/pr70155-17.c: Likewise.
+ * gcc.target/i386/pr70155-18.c: Likewise.
+ * gcc.target/i386/pr70155-19.c: Likewise.
+ * gcc.target/i386/pr70155-20.c: Likewise.
+ * gcc.target/i386/pr70155-21.c: Likewise.
+ * gcc.target/i386/pr70155-22.c: Likewise.
+
+2016-04-27 H.J. Lu <hongjiu.lu@intel.com>
+
* gcc.target/i386/avx512vl-vmovdqa64-1.c: Replace ".{5}" with
".{5,6}".
diff --git a/gcc/testsuite/gcc.target/i386/pr55247-2.c b/gcc/testsuite/gcc.target/i386/pr55247-2.c
index 6b5b36d..77901af 100644
--- a/gcc/testsuite/gcc.target/i386/pr55247-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr55247-2.c
@@ -1,6 +1,6 @@
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-require-effective-target maybe_x32 } */
-/* { dg-options "-O2 -mx32 -mtune=generic -maddress-mode=long" } */
+/* { dg-options "-O2 -mx32 -mtune=generic -maddress-mode=long -dp" } */
typedef unsigned int uint32_t;
typedef uint32_t Elf32_Word;
@@ -34,4 +34,5 @@ _dl_profile_fixup (struct link_map *l, Elf32_Word reloc_arg)
symbind32 (&sym);
}
-/* { dg-final { scan-assembler-not "%xmm\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "movti_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-1.c b/gcc/testsuite/gcc.target/i386/pr70155-1.c
new file mode 100644
index 0000000..3500364
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern __int128 a, b;
+
+void
+foo (void)
+{
+ a = b;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-10.c b/gcc/testsuite/gcc.target/i386/pr70155-10.c
new file mode 100644
index 0000000..2d0b91f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-10.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=core2 -dp" } */
+
+extern __int128 a;
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ a = x.i;
+}
+
+/* { dg-final { scan-assembler-not "movv1ti_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-11.c b/gcc/testsuite/gcc.target/i386/pr70155-11.c
new file mode 100644
index 0000000..b00aa13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-11.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=core2 -dp" } */
+
+extern __int128 a;
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ x.i = a;
+}
+
+/* { dg-final { scan-assembler-not "movv1ti_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-12.c b/gcc/testsuite/gcc.target/i386/pr70155-12.c
new file mode 100644
index 0000000..dd0edf0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-12.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=core2 -dp" } */
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ x.i = 0;
+}
+
+/* { dg-final { scan-assembler-not "movv1ti_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-13.c b/gcc/testsuite/gcc.target/i386/pr70155-13.c
new file mode 100644
index 0000000..6718290
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-13.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=core2 -dp" } */
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ x.i = -1;
+}
+
+/* { dg-final { scan-assembler-not "movv1ti_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-14.c b/gcc/testsuite/gcc.target/i386/pr70155-14.c
new file mode 100644
index 0000000..a43de2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-14.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ x.i = 2;
+}
+
+/* { dg-final { scan-assembler-not "movv1ti_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-15.c b/gcc/testsuite/gcc.target/i386/pr70155-15.c
new file mode 100644
index 0000000..e9cafcc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-15.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=core2 -mtune-ctrl=sse_unaligned_store_optimal -dp" } */
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ x.i = 0;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-16.c b/gcc/testsuite/gcc.target/i386/pr70155-16.c
new file mode 100644
index 0000000..7750b58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-16.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=core2 -mtune-ctrl=sse_unaligned_load_optimal -dp" } */
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ x.i = 0;
+}
+
+/* { dg-final { scan-assembler-not "movv1ti_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-17.c b/gcc/testsuite/gcc.target/i386/pr70155-17.c
new file mode 100644
index 0000000..a9427e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-17.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern __int128 a, b, c, d, e, f;
+
+void
+foo (void)
+{
+ a = 0;
+ b = -1;
+ c = 0;
+ d = -1;
+ e = 0;
+ f = -1;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 8 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-18.c b/gcc/testsuite/gcc.target/i386/pr70155-18.c
new file mode 100644
index 0000000..eb9db68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-18.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern char *src, *dst;
+
+char *
+foo1 (void)
+{
+ return __builtin_memcpy (dst, src, 16);
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-19.c b/gcc/testsuite/gcc.target/i386/pr70155-19.c
new file mode 100644
index 0000000..e2e73aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-19.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern char *src, *dst;
+
+char *
+foo1 (void)
+{
+ return __builtin_mempcpy (dst, src, 16);
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-2.c b/gcc/testsuite/gcc.target/i386/pr70155-2.c
new file mode 100644
index 0000000..af2ddc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x, y;
+
+void
+foo (void)
+{
+ x = y;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-20.c b/gcc/testsuite/gcc.target/i386/pr70155-20.c
new file mode 100644
index 0000000..10b8c45
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-20.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern __int128 a, b;
+
+__int128
+foo (void)
+{
+ a = b;
+ return b;
+}
+
+/* { dg-final { scan-assembler-not "movv1ti_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-21.c b/gcc/testsuite/gcc.target/i386/pr70155-21.c
new file mode 100644
index 0000000..be76e5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-21.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern __int128 a, b, c;
+
+void
+foo (void)
+{
+ a = b;
+ c = a + 1;
+}
+
+/* { dg-final { scan-assembler-not "movv1ti_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-22.c b/gcc/testsuite/gcc.target/i386/pr70155-22.c
new file mode 100644
index 0000000..ff5cbce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-22.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern __int128 a, b, c;
+
+void
+foo (void)
+{
+ a = b;
+ c++;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-3.c b/gcc/testsuite/gcc.target/i386/pr70155-3.c
new file mode 100644
index 0000000..01b38aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-3.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern __int128 a;
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ a = x.i;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-4.c b/gcc/testsuite/gcc.target/i386/pr70155-4.c
new file mode 100644
index 0000000..31bc0a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-4.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern __int128 a;
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ x.i = a;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-5.c b/gcc/testsuite/gcc.target/i386/pr70155-5.c
new file mode 100644
index 0000000..9647452
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-5.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern __int128 a;
+
+void
+foo (void)
+{
+ a = 0;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-6.c b/gcc/testsuite/gcc.target/i386/pr70155-6.c
new file mode 100644
index 0000000..7e074a73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-6.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+extern __int128 a;
+
+void
+foo (void)
+{
+ a = -1;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-7.c b/gcc/testsuite/gcc.target/i386/pr70155-7.c
new file mode 100644
index 0000000..93c6fc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-7.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ x.i = 0;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-8.c b/gcc/testsuite/gcc.target/i386/pr70155-8.c
new file mode 100644
index 0000000..f304a4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-8.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=generic -dp" } */
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x;
+
+void
+foo (void)
+{
+ x.i = -1;
+}
+
+/* { dg-final { scan-assembler-times "movv1ti_internal" 2 } } */
+/* { dg-final { scan-assembler-not "\\*movdi_internal" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr70155-9.c b/gcc/testsuite/gcc.target/i386/pr70155-9.c
new file mode 100644
index 0000000..5dc3a76
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70155-9.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mtune=core2 -dp" } */
+
+struct foo
+{
+ __int128 i;
+}__attribute__ ((packed));
+
+extern struct foo x, y;
+
+void
+foo (void)
+{
+ x = y;
+}
+
+/* { dg-final { scan-assembler-not "movv1ti_internal" } } */