aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2017-04-04 19:52:27 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2017-04-04 19:52:27 +0200
commitd16e2ddd2666efed093ce826228c786715823cb4 (patch)
tree90baf0fa1babd2f5a7a6862baa9740fc718762aa
parente2b2c4b9fbb1c5ee3c8756e6b6ecc6e490fad035 (diff)
downloadgcc-d16e2ddd2666efed093ce826228c786715823cb4.zip
gcc-d16e2ddd2666efed093ce826228c786715823cb4.tar.gz
gcc-d16e2ddd2666efed093ce826228c786715823cb4.tar.bz2
re PR tree-optimization/79390 (10% performance drop in SciMark2 LU after r242550)
PR tree-optimization/79390 * target.h (struct noce_if_info): Declare. * targhooks.h (default_noce_conversion_profitable_p): Declare. * target.def (noce_conversion_profitable_p): New target hook. * ifcvt.h (struct noce_if_info): New type, moved from ... * ifcvt.c (struct noce_if_info): ... here. (noce_conversion_profitable_p): Renamed to ... (default_noce_conversion_profitable_p): ... this. No longer static nor inline. (noce_try_store_flag_constants, noce_try_addcc, noce_try_store_flag_mask, noce_try_cmove, noce_try_cmove_arith, noce_convert_multiple_sets): Use targetm.noce_conversion_profitable_p instead of noce_conversion_profitable_p. * config/i386/i386.c: Include ifcvt.h. (ix86_option_override_internal): Don't override PARAM_MAX_RTL_IF_CONVERSION_INSNS default. (ix86_noce_conversion_profitable_p): New function. (TARGET_NOCE_CONVERSION_PROFITABLE_P): Redefine. * config/i386/x86-tune.def (X86_TUNE_ONE_IF_CONV_INSN): Adjust comment. * doc/tm.texi.in (TARGET_NOCE_CONVERSION_PROFITABLE_P): Add. * doc/tm.texi: Regenerated. * gcc.target/i386/pr79390.c: New test. * gcc.dg/ifcvt-4.c: Use -mtune-ctrl=^one_if_conv_insn for i?86/x86_64. From-SVN: r246686
-rw-r--r--gcc/ChangeLog24
-rw-r--r--gcc/config/i386/i386.c47
-rw-r--r--gcc/config/i386/x86-tune.def4
-rw-r--r--gcc/doc/tm.texi6
-rw-r--r--gcc/doc/tm.texi.in2
-rw-r--r--gcc/ifcvt.c97
-rw-r--r--gcc/ifcvt.h70
-rw-r--r--gcc/target.def10
-rw-r--r--gcc/target.h3
-rw-r--r--gcc/targhooks.h2
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/ifcvt-4.c1
-rw-r--r--gcc/testsuite/gcc.target/i386/pr79390.c28
13 files changed, 209 insertions, 91 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d60dd0b..d537ac5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,27 @@
+2017-04-04 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/79390
+ * target.h (struct noce_if_info): Declare.
+ * targhooks.h (default_noce_conversion_profitable_p): Declare.
+ * target.def (noce_conversion_profitable_p): New target hook.
+ * ifcvt.h (struct noce_if_info): New type, moved from ...
+ * ifcvt.c (struct noce_if_info): ... here.
+ (noce_conversion_profitable_p): Renamed to ...
+ (default_noce_conversion_profitable_p): ... this. No longer
+ static nor inline.
+ (noce_try_store_flag_constants, noce_try_addcc,
+ noce_try_store_flag_mask, noce_try_cmove, noce_try_cmove_arith,
+ noce_convert_multiple_sets): Use targetm.noce_conversion_profitable_p
+ instead of noce_conversion_profitable_p.
+ * config/i386/i386.c: Include ifcvt.h.
+ (ix86_option_override_internal): Don't override
+ PARAM_MAX_RTL_IF_CONVERSION_INSNS default.
+ (ix86_noce_conversion_profitable_p): New function.
+ (TARGET_NOCE_CONVERSION_PROFITABLE_P): Redefine.
+ * config/i386/x86-tune.def (X86_TUNE_ONE_IF_CONV_INSN): Adjust comment.
+ * doc/tm.texi.in (TARGET_NOCE_CONVERSION_PROFITABLE_P): Add.
+ * doc/tm.texi: Regenerated.
+
2017-04-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* doc/extend.texi (PowerPC AltiVec Built-in Functions): Grammar
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 118ab50..96faffd 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see
#include "selftest-rtl.h"
#include "print-rtl.h"
#include "intl.h"
+#include "ifcvt.h"
/* This file should be included last. */
#include "target-def.h"
@@ -6104,13 +6105,6 @@ ix86_option_override_internal (bool main_args_p,
opts->x_param_values,
opts_set->x_param_values);
- /* Restrict number of if-converted SET insns to 1. */
- if (TARGET_ONE_IF_CONV_INSN)
- maybe_set_param_value (PARAM_MAX_RTL_IF_CONVERSION_INSNS,
- 1,
- opts->x_param_values,
- opts_set->x_param_values);
-
/* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
if (opts->x_flag_prefetch_loop_arrays < 0
&& HAVE_prefetch
@@ -50629,6 +50623,41 @@ ix86_max_noce_ifcvt_seq_cost (edge e)
return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
}
+/* Return true if SEQ is a good candidate as a replacement for the
+ if-convertible sequence described in IF_INFO. */
+
+static bool
+ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
+{
+ if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
+ {
+ int cmov_cnt = 0;
+ /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
+ Maybe we should allow even more conditional moves as long as they
+ are used far enough not to stall the CPU, or also consider
+ IF_INFO->TEST_BB succ edge probabilities. */
+ for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
+ {
+ rtx set = single_set (insn);
+ if (!set)
+ continue;
+ if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
+ continue;
+ rtx src = SET_SRC (set);
+ enum machine_mode mode = GET_MODE (src);
+ if (GET_MODE_CLASS (mode) != MODE_INT
+ && GET_MODE_CLASS (mode) != MODE_FLOAT)
+ continue;
+ if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
+ || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
+ continue;
+ /* insn is CMOV or FCMOV. */
+ if (++cmov_cnt > 1)
+ return false;
+ }
+ }
+ return default_noce_conversion_profitable_p (seq, if_info);
+}
/* Implement targetm.vectorize.init_cost. */
@@ -52181,6 +52210,10 @@ ix86_run_selftests (void)
#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
#define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
+
+#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
+#define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
+
#if CHECKING_P
#undef TARGET_RUN_TARGET_SELFTESTS
#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 19b7dae..c642f45 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -547,7 +547,7 @@ DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0U)
the unroll factor so that the unrolled loop fits the loop buffer. */
DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
-/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of set insns to be
- if-converted to one. */
+/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in
+ if-converted sequence to one. */
DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index d59a4b9..c4f2c89 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6644,6 +6644,12 @@ The default implementation of this hook uses the
and uses a multiple of @code{BRANCH_COST} otherwise.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_NOCE_CONVERSION_PROFITABLE_P (rtx_insn *@var{seq}, struct noce_if_info *@var{if_info})
+This hook returns true if the instruction sequence @code{seq} is a good
+candidate as a replacement for the if-convertible sequence described in
+@code{if_info}.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P (void)
This predicate controls the use of the eager delay slot filler to disallow
speculatively executed instructions being placed in delay slots. Targets
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 88c12dd..1c471d8 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4796,6 +4796,8 @@ Define this macro if a non-short-circuit operation produced by
@hook TARGET_MAX_NOCE_IFCVT_SEQ_COST
+@hook TARGET_NOCE_CONVERSION_PROFITABLE_P
+
@hook TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
@node Scheduling
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 4fd1744..e51ccab 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -760,76 +760,6 @@ cond_exec_process_if_block (ce_if_block * ce_info,
return FALSE;
}
-/* Used by noce_process_if_block to communicate with its subroutines.
-
- The subroutines know that A and B may be evaluated freely. They
- know that X is a register. They should insert new instructions
- before cond_earliest. */
-
-struct noce_if_info
-{
- /* The basic blocks that make up the IF-THEN-{ELSE-,}JOIN block. */
- basic_block test_bb, then_bb, else_bb, join_bb;
-
- /* The jump that ends TEST_BB. */
- rtx_insn *jump;
-
- /* The jump condition. */
- rtx cond;
-
- /* Reversed jump condition. */
- rtx rev_cond;
-
- /* New insns should be inserted before this one. */
- rtx_insn *cond_earliest;
-
- /* Insns in the THEN and ELSE block. There is always just this
- one insns in those blocks. The insns are single_set insns.
- If there was no ELSE block, INSN_B is the last insn before
- COND_EARLIEST, or NULL_RTX. In the former case, the insn
- operands are still valid, as if INSN_B was moved down below
- the jump. */
- rtx_insn *insn_a, *insn_b;
-
- /* The SET_SRC of INSN_A and INSN_B. */
- rtx a, b;
-
- /* The SET_DEST of INSN_A. */
- rtx x;
-
- /* The original set destination that the THEN and ELSE basic blocks finally
- write their result to. */
- rtx orig_x;
- /* True if this if block is not canonical. In the canonical form of
- if blocks, the THEN_BB is the block reached via the fallthru edge
- from TEST_BB. For the noce transformations, we allow the symmetric
- form as well. */
- bool then_else_reversed;
-
- /* True if the contents of then_bb and else_bb are a
- simple single set instruction. */
- bool then_simple;
- bool else_simple;
-
- /* True if we're optimisizing the control block for speed, false if
- we're optimizing for size. */
- bool speed_p;
-
- /* An estimate of the original costs. When optimizing for size, this is the
- combined cost of COND, JUMP and the costs for THEN_BB and ELSE_BB.
- When optimizing for speed, we use the costs of COND plus the minimum of
- the costs for THEN_BB and ELSE_BB, as computed in the next field. */
- unsigned int original_cost;
-
- /* Maximum permissible cost for the unconditional sequence we should
- generate to replace this branch. */
- unsigned int max_seq_cost;
-
- /* The name of the noce transform that succeeded in if-converting
- this structure. Used for debugging. */
- const char *transform_name;
-};
-
static rtx noce_emit_store_flag (struct noce_if_info *, rtx, int, int);
static int noce_try_move (struct noce_if_info *);
static int noce_try_ifelse_collapse (struct noce_if_info *);
@@ -857,11 +787,14 @@ noce_reversed_cond_code (struct noce_if_info *if_info)
return reversed_comparison_code (if_info->cond, if_info->jump);
}
-/* Return TRUE if SEQ is a good candidate as a replacement for the
- if-convertible sequence described in IF_INFO. */
+/* Return true if SEQ is a good candidate as a replacement for the
+ if-convertible sequence described in IF_INFO.
+ This is the default implementation that targets can override
+ through a target hook. */
-inline static bool
-noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
+bool
+default_noce_conversion_profitable_p (rtx_insn *seq,
+ struct noce_if_info *if_info)
{
bool speed_p = if_info->speed_p;
@@ -1544,7 +1477,7 @@ noce_try_store_flag_constants (struct noce_if_info *if_info)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
@@ -1605,7 +1538,7 @@ noce_try_addcc (struct noce_if_info *if_info)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
@@ -1647,7 +1580,7 @@ noce_try_addcc (struct noce_if_info *if_info)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
@@ -1698,7 +1631,7 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
@@ -1850,7 +1783,7 @@ noce_try_cmove (struct noce_if_info *if_info)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
@@ -1903,7 +1836,7 @@ noce_try_cmove (struct noce_if_info *if_info)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
@@ -2345,7 +2278,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
noce_emit_move_insn (x, target);
ifcvt_seq = end_ifcvt_sequence (if_info);
- if (!ifcvt_seq || !noce_conversion_profitable_p (ifcvt_seq, if_info))
+ if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info))
return FALSE;
emit_insn_before_setloc (ifcvt_seq, if_info->jump,
@@ -3308,7 +3241,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
/* Actually emit the sequence if it isn't too expensive. */
rtx_insn *seq = get_insns ();
- if (!noce_conversion_profitable_p (seq, if_info))
+ if (!targetm.noce_conversion_profitable_p (seq, if_info))
{
end_sequence ();
return FALSE;
diff --git a/gcc/ifcvt.h b/gcc/ifcvt.h
index e6276bb..1935de5e 100644
--- a/gcc/ifcvt.h
+++ b/gcc/ifcvt.h
@@ -40,4 +40,74 @@ struct ce_if_block
int pass; /* Pass number. */
};
+/* Used by noce_process_if_block to communicate with its subroutines.
+
+ The subroutines know that A and B may be evaluated freely. They
+ know that X is a register. They should insert new instructions
+ before cond_earliest. */
+
+struct noce_if_info
+{
+ /* The basic blocks that make up the IF-THEN-{ELSE-,}JOIN block. */
+ basic_block test_bb, then_bb, else_bb, join_bb;
+
+ /* The jump that ends TEST_BB. */
+ rtx_insn *jump;
+
+ /* The jump condition. */
+ rtx cond;
+
+ /* Reversed jump condition. */
+ rtx rev_cond;
+
+ /* New insns should be inserted before this one. */
+ rtx_insn *cond_earliest;
+
+ /* Insns in the THEN and ELSE block. There is always just this
+ one insns in those blocks. The insns are single_set insns.
+ If there was no ELSE block, INSN_B is the last insn before
+ COND_EARLIEST, or NULL_RTX. In the former case, the insn
+ operands are still valid, as if INSN_B was moved down below
+ the jump. */
+ rtx_insn *insn_a, *insn_b;
+
+ /* The SET_SRC of INSN_A and INSN_B. */
+ rtx a, b;
+
+ /* The SET_DEST of INSN_A. */
+ rtx x;
+
+ /* The original set destination that the THEN and ELSE basic blocks finally
+ write their result to. */
+ rtx orig_x;
+ /* True if this if block is not canonical. In the canonical form of
+ if blocks, the THEN_BB is the block reached via the fallthru edge
+ from TEST_BB. For the noce transformations, we allow the symmetric
+ form as well. */
+ bool then_else_reversed;
+
+ /* True if the contents of then_bb and else_bb are a
+ simple single set instruction. */
+ bool then_simple;
+ bool else_simple;
+
+ /* True if we're optimisizing the control block for speed, false if
+ we're optimizing for size. */
+ bool speed_p;
+
+ /* An estimate of the original costs. When optimizing for size, this is the
+ combined cost of COND, JUMP and the costs for THEN_BB and ELSE_BB.
+ When optimizing for speed, we use the costs of COND plus the minimum of
+ the costs for THEN_BB and ELSE_BB, as computed in the next field. */
+ unsigned int original_cost;
+
+ /* Maximum permissible cost for the unconditional sequence we should
+ generate to replace this branch. */
+ unsigned int max_seq_cost;
+
+ /* The name of the noce transform that succeeded in if-converting
+ this structure. Used for debugging. */
+ const char *transform_name;
+};
+
#endif /* GCC_IFCVT_H */
diff --git a/gcc/target.def b/gcc/target.def
index 43600ae..6bebfd5 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3660,6 +3660,16 @@ and uses a multiple of @code{BRANCH_COST} otherwise.",
unsigned int, (edge e),
default_max_noce_ifcvt_seq_cost)
+/* Return true if the given instruction sequence is a good candidate
+ as a replacement for the if-convertible sequence. */
+DEFHOOK
+(noce_conversion_profitable_p,
+ "This hook returns true if the instruction sequence @code{seq} is a good\n\
+candidate as a replacement for the if-convertible sequence described in\n\
+@code{if_info}.",
+bool, (rtx_insn *seq, struct noce_if_info *if_info),
+default_noce_conversion_profitable_p)
+
/* Permit speculative instructions in delay slots during delayed-branch
scheduling. */
DEFHOOK
diff --git a/gcc/target.h b/gcc/target.h
index 63390db..393de40 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -140,6 +140,9 @@ struct ddg;
/* This is defined in cfgloop.h . */
struct loop;
+/* This is defined in ifcvt.h. */
+struct noce_if_info;
+
/* This is defined in tree-ssa-alias.h. */
struct ao_ref;
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index a5565f5..18070df 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -257,6 +257,8 @@ extern void default_setup_incoming_vararg_bounds (cumulative_args_t ca ATTRIBUTE
extern bool default_optab_supported_p (int, machine_mode, machine_mode,
optimization_type);
extern unsigned int default_max_noce_ifcvt_seq_cost (edge);
+extern bool default_noce_conversion_profitable_p (rtx_insn *,
+ struct noce_if_info *);
extern unsigned int default_min_arithmetic_precision (void);
extern enum flt_eval_method
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9c1a764..6d1ed22 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2017-04-04 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/79390
+ * gcc.target/i386/pr79390.c: New test.
+ * gcc.dg/ifcvt-4.c: Use -mtune-ctrl=^one_if_conv_insn for i?86/x86_64.
+
2017-04-04 Volker Reichelt <v.reichelt@netcologne.de>
PR c++/80296
diff --git a/gcc/testsuite/gcc.dg/ifcvt-4.c b/gcc/testsuite/gcc.dg/ifcvt-4.c
index b4a4bc8..87bae3c 100644
--- a/gcc/testsuite/gcc.dg/ifcvt-4.c
+++ b/gcc/testsuite/gcc.dg/ifcvt-4.c
@@ -1,6 +1,7 @@
/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=3 --param max-rtl-if-conversion-unpredictable-cost=100" } */
/* { dg-additional-options "-misel" { target { powerpc*-*-* } } } */
/* { dg-additional-options "-march=z196" { target { s390x-*-* } } } */
+/* { dg-additional-options "-mtune-ctrl=^one_if_conv_insn" { target { i?86-*-* x86_64-*-* } } } */
/* { dg-skip-if "Multiple set if-conversion not guaranteed on all subtargets" { "arm*-*-* hppa*64*-*-* s390-*-* visium-*-*" riscv*-*-* } } */
/* { dg-skip-if "" { "s390x-*-*" } { "-m31" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr79390.c b/gcc/testsuite/gcc.target/i386/pr79390.c
new file mode 100644
index 0000000..1c7d601
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr79390.c
@@ -0,0 +1,28 @@
+/* PR tree-optimization/79390 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -march=haswell -mtune=haswell -mfpmath=sse" } */
+/* Make sure we emit a conditional move in this loop. */
+
+extern double A[32];
+
+int
+foo (void)
+{
+ double t = A[0];
+ int jp = 0;
+ int i;
+
+ for (i = 0; i < 32; i++)
+ {
+ double ab = A[i];
+ if (ab > t)
+ {
+ jp = i;
+ t = ab;
+ }
+ }
+
+ return jp;
+}
+
+/* { dg-final { scan-assembler "\[ \\t\]cmov\[a-z]+\[ \\t\]" } } */