From 511dcace074c36f4c8d7fcf835ba795fa82268c0 Mon Sep 17 00:00:00 2001 From: Vladimir Makarov Date: Wed, 9 Jan 2013 17:02:11 +0000 Subject: PR rtl-optimization/pr55829 2013-01-09 Vladimir Makarov PR rtl-optimization/pr55829 * lra-constraints.c (match_reload): Add code for absent output. (curr_insn_transform): Add code for reloads of matched inputs without output. 2013-01-09 Vladimir Makarov PR rtl-optimization/pr55829 * gcc.target/i386/pr55829.c: New. From-SVN: r195057 --- gcc/ChangeLog | 7 ++++++ gcc/lra-constraints.c | 44 ++++++++++++++++++++++----------- gcc/testsuite/ChangeLog | 5 ++++ gcc/testsuite/gcc.target/i386/pr55829.c | 34 +++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr55829.c (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6377fe5..cbdc1ad 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2013-01-09 Vladimir Makarov + + PR rtl-optimization/pr55829 + * lra-constraints.c (match_reload): Add code for absent output. + (curr_insn_transform): Add code for reloads of matched inputs + without output. + 2013-01-09 Uros Bizjak * config/i386/sse.md (*vec_interleave_highv2df): Change mode diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index f6c6c89..fcece42 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -658,8 +658,9 @@ narrow_reload_pseudo_class (rtx reg, enum reg_class cl) /* Generate reloads for matching OUT and INS (array of input operand numbers with end marker -1) with reg class GOAL_CLASS. Add input - and output reloads correspondingly to the lists *BEFORE and - *AFTER. */ + and output reloads correspondingly to the lists *BEFORE and *AFTER. + OUT might be negative. In this case we generate input reloads for + matched input operands INS. */ static void match_reload (signed char out, signed char *ins, enum reg_class goal_class, rtx *before, rtx *after) @@ -668,10 +669,10 @@ match_reload (signed char out, signed char *ins, enum reg_class goal_class, rtx new_in_reg, new_out_reg, reg, clobber; enum machine_mode inmode, outmode; rtx in_rtx = *curr_id->operand_loc[ins[0]]; - rtx out_rtx = *curr_id->operand_loc[out]; + rtx out_rtx = out < 0 ? in_rtx : *curr_id->operand_loc[out]; - outmode = curr_operand_mode[out]; inmode = curr_operand_mode[ins[0]]; + outmode = out < 0 ? inmode : curr_operand_mode[out]; push_to_sequence (*before); if (inmode != outmode) { @@ -746,14 +747,13 @@ match_reload (signed char out, signed char *ins, enum reg_class goal_class, = lra_create_new_reg_with_unique_value (outmode, out_rtx, goal_class, ""); } - /* In and out operand can be got from transformations before - processing insn constraints. One example of such transformations - is subreg reloading (see function simplify_operand_subreg). The - new pseudos created by the transformations might have inaccurate + /* In operand can be got from transformations before processing insn + constraints. One example of such transformations is subreg + reloading (see function simplify_operand_subreg). The new + pseudos created by the transformations might have inaccurate class (ALL_REGS) and we should make their classes more accurate. */ narrow_reload_pseudo_class (in_rtx, goal_class); - narrow_reload_pseudo_class (out_rtx, goal_class); lra_emit_move (copy_rtx (new_in_reg), in_rtx); *before = get_insns (); end_sequence (); @@ -765,6 +765,10 @@ match_reload (signed char out, signed char *ins, enum reg_class goal_class, *curr_id->operand_loc[in] = new_in_reg; } lra_update_dups (curr_id, ins); + if (out < 0) + return; + /* See a comment for the input operand above. */ + narrow_reload_pseudo_class (out_rtx, goal_class); if (find_reg_note (curr_insn, REG_UNUSED, out_rtx) == NULL_RTX) { start_sequence (); @@ -2597,6 +2601,7 @@ curr_insn_transform (void) int n_alternatives; int commutative; signed char goal_alt_matched[MAX_RECOG_OPERANDS][MAX_RECOG_OPERANDS]; + signed char match_inputs[MAX_RECOG_OPERANDS + 1]; rtx before, after; bool alt_p = false; /* Flag that the insn has been changed through a transformation. */ @@ -3052,17 +3057,28 @@ curr_insn_transform (void) && (curr_static_id->operand[goal_alt_matched[i][0]].type == OP_OUT)) { - signed char arr[2]; - - arr[0] = i; - arr[1] = -1; - match_reload (goal_alt_matched[i][0], arr, + /* generate reloads for input and matched outputs. */ + match_inputs[0] = i; + match_inputs[1] = -1; + match_reload (goal_alt_matched[i][0], match_inputs, goal_alt[i], &before, &after); } else if (curr_static_id->operand[i].type == OP_OUT && (curr_static_id->operand[goal_alt_matched[i][0]].type == OP_IN)) + /* Generate reloads for output and matched inputs. */ match_reload (i, goal_alt_matched[i], goal_alt[i], &before, &after); + else if (curr_static_id->operand[i].type == OP_IN + && (curr_static_id->operand[goal_alt_matched[i][0]].type + == OP_IN)) + { + /* Generate reloads for matched inputs. */ + match_inputs[0] = i; + for (j = 0; (k = goal_alt_matched[i][j]) >= 0; j++) + match_inputs[j + 1] = k; + match_inputs[j + 1] = -1; + match_reload (-1, match_inputs, goal_alt[i], &before, &after); + } else /* We must generate code in any case when function process_alt_operands decides that it is possible. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bbb51fb..b545fe1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2013-01-09 Vladimir Makarov + + PR rtl-optimization/pr55829 + * gcc.target/i386/pr55829.c: New. + 2013-01-09 Tobias Burnus PR fortran/55758 diff --git a/gcc/testsuite/gcc.target/i386/pr55829.c b/gcc/testsuite/gcc.target/i386/pr55829.c new file mode 100644 index 0000000..be70ba2f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr55829.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse3 -fno-expensive-optimizations" } */ + +typedef double __m128d __attribute__ ((__vector_size__ (16))); + +extern double p1[]; +extern double p2[]; +extern double ck[]; +extern int n; + +__attribute__((__noinline__, __noclone__)) int chk_pd (double *v1, double *v2) +{ + return v2[n] != v1[n]; +} + +static inline void sse3_test_movddup_reg_subsume_ldsd (double *i1, double *r) +{ + __m128d t1 = (__m128d){*i1, 0}; + __m128d t2 = __builtin_ia32_shufpd (t1, t1, 0); + __builtin_ia32_storeupd (r, t2); +} + +int sse3_test (void) +{ + int i = 0; + int fail = 0; + for (; i < 80; i += 1) + { + ck[0] = p1[0]; + fail += chk_pd (ck, p2); + sse3_test_movddup_reg_subsume_ldsd (p1, p2); + } + return fail; +} -- cgit v1.1