aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog40
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c34
-rw-r--r--gcc/config/aarch64/aarch64-protos.h2
-rw-r--r--gcc/config/aarch64/aarch64-simd.md2
-rw-r--r--gcc/config/aarch64/aarch64.c49
-rw-r--r--gcc/config/i386/i386.c79
-rw-r--r--gcc/config/rs6000/rs6000.c95
-rw-r--r--gcc/doc/md.texi12
-rw-r--r--gcc/doc/tm.texi6
-rw-r--r--gcc/internal-fn.def2
-rw-r--r--gcc/optabs.def1
-rw-r--r--gcc/target.def8
-rw-r--r--gcc/targhooks.c2
-rw-r--r--gcc/targhooks.h2
-rw-r--r--gcc/tree-ssa-math-opts.c79
15 files changed, 228 insertions, 185 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6e9c823..b2e43ff 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,43 @@
+2015-12-03 Richard Sandiford <richard.sandiford@arm.com>
+
+ * internal-fn.def (RSQRT): New function.
+ * optabs.def (rsqrt_optab): New optab.
+ * doc/md.texi (rsqrtM2): Document.
+ * target.def (builtin_reciprocal): Replace gcall argument with
+ a function decl. Restrict hook to machine functions.
+ * doc/tm.texi: Regenerate.
+ * targhooks.h (default_builtin_reciprocal): Update prototype.
+ * targhooks.c (default_builtin_reciprocal): Likewise.
+ * tree-ssa-math-opts.c: Include internal-fn.h.
+ (internal_fn_reciprocal): New function.
+ (pass_cse_reciprocals::execute): Call it, and build a call to an
+ internal function on success. Only call targetm.builtin_reciprocal
+ for machine functions.
+ * config/aarch64/aarch64-protos.h (aarch64_builtin_rsqrt): Remove
+ second argument.
+ * config/aarch64/aarch64-builtins.c (aarch64_expand_builtin_rsqrt):
+ Rename aarch64_rsqrt_<mode>2 to rsqrt<mode>2.
+ (aarch64_builtin_rsqrt): Remove md_fn argument and only handle
+ machine functions.
+ * config/aarch64/aarch64.c (use_rsqrt_p): New function.
+ (aarch64_builtin_reciprocal): Replace gcall argument with a
+ function decl. Use use_rsqrt_p. Remove optimize_size check.
+ Only handle machine functions. Update call to aarch64_builtin_rsqrt.
+ (aarch64_optab_supported_p): New function.
+ (TARGET_OPTAB_SUPPORTED_P): Define.
+ * config/aarch64/aarch64-simd.md (aarch64_rsqrt_<mode>2): Rename to...
+ (rsqrt<mode>2): ...this.
+ * config/i386/i386.c (use_rsqrt_p): New function.
+ (ix86_builtin_reciprocal): Replace gcall argument with a
+ function decl. Use use_rsqrt_p. Remove optimize_insn_for_size_p
+ check. Only handle machine functions.
+ (ix86_optab_supported_p): Handle rsqrt_optab.
+ * config/rs6000/rs6000.c (TARGET_OPTAB_SUPPORTED_P): Define.
+ (rs6000_builtin_reciprocal): Replace gcall argument with a
+ function decl. Remove optimize_insn_for_size_p check.
+ Only handle machine functions.
+ (rs6000_optab_supported_p): New function.
+
2015-12-03 Bernd Schmidt <bschmidt@redhat.com>
PR target/68471
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 45011f6..b268a6a 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1176,19 +1176,19 @@ aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
switch (fcode)
{
case AARCH64_BUILTIN_RSQRT_DF:
- gen = gen_aarch64_rsqrt_df2;
+ gen = gen_rsqrtdf2;
break;
case AARCH64_BUILTIN_RSQRT_SF:
- gen = gen_aarch64_rsqrt_sf2;
+ gen = gen_rsqrtsf2;
break;
case AARCH64_BUILTIN_RSQRT_V2DF:
- gen = gen_aarch64_rsqrt_v2df2;
+ gen = gen_rsqrtv2df2;
break;
case AARCH64_BUILTIN_RSQRT_V2SF:
- gen = gen_aarch64_rsqrt_v2sf2;
+ gen = gen_rsqrtv2sf2;
break;
case AARCH64_BUILTIN_RSQRT_V4SF:
- gen = gen_aarch64_rsqrt_v4sf2;
+ gen = gen_rsqrtv4sf2;
break;
default: gcc_unreachable ();
}
@@ -1405,24 +1405,14 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
/* Return builtin for reciprocal square root. */
tree
-aarch64_builtin_rsqrt (unsigned int fn, bool md_fn)
+aarch64_builtin_rsqrt (unsigned int fn)
{
- if (md_fn)
- {
- if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
- return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
- if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
- return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
- if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
- return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
- }
- else
- {
- if (fn == BUILT_IN_SQRT)
- return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_DF];
- if (fn == BUILT_IN_SQRTF)
- return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_SF];
- }
+ if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
+ return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
+ if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
+ return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
+ if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
+ return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
return NULL_TREE;
}
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e0a050c..e6bfe06 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -407,7 +407,7 @@ rtx aarch64_expand_builtin (tree exp,
int ignore ATTRIBUTE_UNUSED);
tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED);
-tree aarch64_builtin_rsqrt (unsigned int, bool);
+tree aarch64_builtin_rsqrt (unsigned int);
tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ae1075c..030a101 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -399,7 +399,7 @@
"frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")])
-(define_expand "aarch64_rsqrt_<mode>2"
+(define_expand "rsqrt<mode>2"
[(set (match_operand:VALLF 0 "register_operand" "=w")
(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
UNSPEC_RSQRT))]
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 88dbe15..88f3ef8 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7099,26 +7099,27 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
return aarch64_tune_params.memmov_cost;
}
+/* Return true if it is safe and beneficial to use the rsqrt optabs to
+ optimize 1.0/sqrt. */
+
+static bool
+use_rsqrt_p (void)
+{
+ return (!flag_trapping_math
+ && flag_unsafe_math_optimizations
+ && (aarch64_tune_params.extra_tuning_flags
+ & AARCH64_EXTRA_TUNE_RECIP_SQRT));
+}
+
/* Function to decide when to use
reciprocal square root builtins. */
static tree
-aarch64_builtin_reciprocal (gcall *call)
+aarch64_builtin_reciprocal (tree fndecl)
{
- if (flag_trapping_math
- || !flag_unsafe_math_optimizations
- || optimize_size
- || ! (aarch64_tune_params.extra_tuning_flags
- & AARCH64_EXTRA_TUNE_RECIP_SQRT))
+ if (!use_rsqrt_p ())
return NULL_TREE;
-
- if (gimple_call_internal_p (call))
- return NULL_TREE;
-
- tree fndecl = gimple_call_fndecl (call);
- enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
- bool md_fn = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
- return aarch64_builtin_rsqrt (fn, md_fn);
+ return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
}
typedef rtx (*rsqrte_type) (rtx, rtx);
@@ -13546,6 +13547,23 @@ aarch64_promoted_type (const_tree t)
return float_type_node;
return NULL_TREE;
}
+
+/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
+
+static bool
+aarch64_optab_supported_p (int op, machine_mode, machine_mode,
+ optimization_type opt_type)
+{
+ switch (op)
+ {
+ case rsqrt_optab:
+ return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
+
+ default:
+ return true;
+ }
+}
+
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST aarch64_address_cost
@@ -13866,6 +13884,9 @@ aarch64_promoted_type (const_tree t)
#undef TARGET_PRINT_OPERAND_ADDRESS
#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
+#undef TARGET_OPTAB_SUPPORTED_P
+#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-aarch64.h"
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 96d6c98..cd44375 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42676,66 +42676,36 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
return ix86_builtins[code];
}
+/* Return true if it is safe to use the rsqrt optabs to optimize
+ 1.0/sqrt. */
+
+static bool
+use_rsqrt_p ()
+{
+ return (TARGET_SSE_MATH
+ && flag_finite_math_only
+ && !flag_trapping_math
+ && flag_unsafe_math_optimizations);
+}
+
/* Returns a code for a target-specific builtin that implements
reciprocal of the function, or NULL_TREE if not available. */
static tree
-ix86_builtin_reciprocal (gcall *call)
+ix86_builtin_reciprocal (tree fndecl)
{
- if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
- && flag_finite_math_only && !flag_trapping_math
- && flag_unsafe_math_optimizations))
- return NULL_TREE;
-
- if (gimple_call_internal_p (call))
- switch (gimple_call_internal_fn (call))
- {
- tree type;
- case IFN_SQRT:
- type = TREE_TYPE (gimple_call_lhs (call));
- switch (TYPE_MODE (type))
- {
- /* Vectorized version of sqrt to rsqrt conversion. */
- case V4SFmode:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
-
- case V8SFmode:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
-
- default:
- return NULL_TREE;
- }
-
- default:
- return NULL_TREE;
- }
-
- tree fndecl = gimple_call_fndecl (call);
- if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
- /* Machine dependent builtins. */
- switch (DECL_FUNCTION_CODE (fndecl))
- {
- /* Vectorized version of sqrt to rsqrt conversion. */
- case IX86_BUILTIN_SQRTPS_NR:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ /* Vectorized version of sqrt to rsqrt conversion. */
+ case IX86_BUILTIN_SQRTPS_NR:
+ return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
- case IX86_BUILTIN_SQRTPS_NR256:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
+ case IX86_BUILTIN_SQRTPS_NR256:
+ return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
- default:
- return NULL_TREE;
- }
- else
- /* Normal builtins. */
- switch (DECL_FUNCTION_CODE (fndecl))
- {
- /* Sqrt to rsqrt conversion. */
- case BUILT_IN_SQRTF:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
-
- default:
- return NULL_TREE;
- }
+ default:
+ return NULL_TREE;
+ }
}
/* Helper for avx_vpermilps256_operand et al. This is also used by
@@ -54139,6 +54109,9 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
return true;
return opt_type == OPTIMIZE_FOR_SPEED;
+ case rsqrt_optab:
+ return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
+
default:
return true;
}
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 22eb0e5..2b2c170 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1722,6 +1722,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_INVALID_BINARY_OP
#define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
+
+#undef TARGET_OPTAB_SUPPORTED_P
+#define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
/* Processor table. */
@@ -32713,77 +32716,25 @@ rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
reciprocal of the function, or NULL_TREE if not available. */
static tree
-rs6000_builtin_reciprocal (gcall *call)
+rs6000_builtin_reciprocal (tree fndecl)
{
- if (optimize_insn_for_size_p ())
- return NULL_TREE;
-
- if (gimple_call_internal_p (call))
- switch (gimple_call_internal_fn (call))
- {
- tree type;
- case IFN_SQRT:
- type = TREE_TYPE (gimple_call_lhs (call));
- switch (TYPE_MODE (type))
- {
- case V2DFmode:
- if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
- return NULL_TREE;
-
- return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
-
- case V4SFmode:
- if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
- return NULL_TREE;
-
- return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
-
- default:
- return NULL_TREE;
- }
-
- default:
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ case VSX_BUILTIN_XVSQRTDP:
+ if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
return NULL_TREE;
- }
-
- tree fndecl = gimple_call_fndecl (call);
- if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
- switch (DECL_FUNCTION_CODE (fndecl))
- {
- case VSX_BUILTIN_XVSQRTDP:
- if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
- return NULL_TREE;
-
- return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
- case VSX_BUILTIN_XVSQRTSP:
- if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
- return NULL_TREE;
+ return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
- return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
-
- default:
+ case VSX_BUILTIN_XVSQRTSP:
+ if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
return NULL_TREE;
- }
-
- else
- switch (DECL_FUNCTION_CODE (fndecl))
- {
- case BUILT_IN_SQRT:
- if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
- return NULL_TREE;
-
- return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
-
- case BUILT_IN_SQRTF:
- if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
- return NULL_TREE;
- return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
+ return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
- default:
- return NULL_TREE;
- }
+ default:
+ return NULL_TREE;
+ }
}
/* Load up a constant. If the mode is a vector mode, splat the value across
@@ -38007,6 +37958,22 @@ rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
*update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
}
+/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
+
+static bool
+rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
+ optimization_type opt_type)
+{
+ switch (op)
+ {
+ case rsqrt_optab:
+ return (opt_type == OPTIMIZE_FOR_SPEED
+ && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
+
+ default:
+ return true;
+ }
+}
struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 8e3f8f5..dcb3ee0 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5331,6 +5331,18 @@ corresponds to the C data type @code{double} and the @code{sqrtf}
built-in function uses the mode which corresponds to the C data
type @code{float}.
+@cindex @code{rsqrt@var{m}2} instruction pattern
+@item @samp{rsqrt@var{m}2}
+Store the reciprocal of the square root of operand 1 into operand 0.
+On most architectures this pattern is only approximate, so either
+its C condition or the @code{TARGET_OPTAB_SUPPORTED_P} hook should
+check for the appropriate math flags. (Using the C condition is
+more direct, but using @code{TARGET_OPTAB_SUPPORTED_P} can be useful
+if a target-specific built-in also uses the @samp{rsqrt@var{m}2}
+pattern.)
+
+This pattern is not allowed to @code{FAIL}.
+
@cindex @code{fmod@var{m}3} instruction pattern
@item @samp{fmod@var{m}3}
Store the remainder of dividing operand 1 by operand 2 into
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index a84ad57..1ce7181 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5608,9 +5608,9 @@ be placed in an @code{object_block} structure.
The default version returns true for all decls.
@end deftypefn
-@deftypefn {Target Hook} tree TARGET_BUILTIN_RECIPROCAL (gcall *@var{call})
-This hook should return the DECL of a function that implements reciprocal of
-the builtin or internal function call @var{call}, or
+@deftypefn {Target Hook} tree TARGET_BUILTIN_RECIPROCAL (tree @var{fndecl})
+This hook should return the DECL of a function that implements the
+reciprocal of the machine-specific builtin function @var{fndecl}, or
@code{NULL_TREE} if such a function is not available.
@end deftypefn
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index dee9332..a9118b3 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -91,6 +91,8 @@ DEF_INTERNAL_OPTAB_FN (LOAD_LANES, ECF_CONST, vec_load_lanes, load_lanes)
DEF_INTERNAL_OPTAB_FN (MASK_STORE, 0, maskstore, mask_store)
DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
+DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
+
/* Unary math functions. */
DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary)
DEF_INTERNAL_FLT_FN (ASIN, ECF_CONST, asin, unary)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 8feb394..a19466e 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -267,6 +267,7 @@ OPTAB_D (log_optab, "log$a2")
OPTAB_D (logb_optab, "logb$a2")
OPTAB_D (pow_optab, "pow$a3")
OPTAB_D (remainder_optab, "remainder$a3")
+OPTAB_D (rsqrt_optab, "rsqrt$a2")
OPTAB_D (scalb_optab, "scalb$a3")
OPTAB_D (signbit_optab, "signbit$F$a2")
OPTAB_D (significand_optab, "significand$a2")
diff --git a/gcc/target.def b/gcc/target.def
index 1971892..d754337 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2459,13 +2459,13 @@ identical versions.",
tree, (void *decl), NULL)
/* Returns a code for a target-specific builtin that implements
- reciprocal of the function, or NULL_TREE if not available. */
+ reciprocal of a target-specific function, or NULL_TREE if not available. */
DEFHOOK
(builtin_reciprocal,
- "This hook should return the DECL of a function that implements reciprocal of\n\
-the builtin or internal function call @var{call}, or\n\
+ "This hook should return the DECL of a function that implements the\n\
+reciprocal of the machine-specific builtin function @var{fndecl}, or\n\
@code{NULL_TREE} if such a function is not available.",
- tree, (gcall *call),
+ tree, (tree fndecl),
default_builtin_reciprocal)
/* For a vendor-specific TYPE, return a pointer to a statically-allocated
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 7045609..dcf0863 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -600,7 +600,7 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
/* Reciprocal. */
tree
-default_builtin_reciprocal (gcall *)
+default_builtin_reciprocal (tree)
{
return NULL_TREE;
}
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 281b5fe..47b5cfc 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -90,7 +90,7 @@ extern tree default_builtin_vectorized_conversion (unsigned int, tree, tree);
extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
-extern tree default_builtin_reciprocal (gcall *);
+extern tree default_builtin_reciprocal (tree);
extern HOST_WIDE_INT default_vector_alignment (const_tree);
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 66d7501..b00f046 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -110,6 +110,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-ssa.h"
#include "builtins.h"
#include "params.h"
+#include "internal-fn.h"
#include "case-cfn-macros.h"
/* This structure represents one basic block that either computes a
@@ -497,6 +498,31 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
occ_head = NULL;
}
+/* Return an internal function that implements the reciprocal of CALL,
+ or IFN_LAST if there is no such function that the target supports. */
+
+internal_fn
+internal_fn_reciprocal (gcall *call)
+{
+ internal_fn ifn;
+
+ switch (gimple_call_combined_fn (call))
+ {
+ CASE_CFN_SQRT:
+ ifn = IFN_RSQRT;
+ break;
+
+ default:
+ return IFN_LAST;
+ }
+
+ tree_pair types = direct_internal_fn_types (ifn, call);
+ if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
+ return IFN_LAST;
+
+ return ifn;
+}
+
/* Go through all the floating-point SSA_NAMEs, and call
execute_cse_reciprocals_1 on each of them. */
namespace {
@@ -586,7 +612,6 @@ pass_cse_reciprocals::execute (function *fun)
gsi_next (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
- tree fndecl;
if (is_gimple_assign (stmt)
&& gimple_assign_rhs_code (stmt) == RDIV_EXPR)
@@ -600,20 +625,25 @@ pass_cse_reciprocals::execute (function *fun)
stmt1 = SSA_NAME_DEF_STMT (arg1);
if (is_gimple_call (stmt1)
- && gimple_call_lhs (stmt1)
- && (gimple_call_internal_p (stmt1)
- || ((fndecl = gimple_call_fndecl (stmt1))
- && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
- || (DECL_BUILT_IN_CLASS (fndecl)
- == BUILT_IN_MD)))))
+ && gimple_call_lhs (stmt1))
{
bool fail;
imm_use_iterator ui;
use_operand_p use_p;
+ tree fndecl = NULL_TREE;
- fndecl = targetm.builtin_reciprocal (as_a <gcall *> (stmt1));
- if (!fndecl)
- continue;
+ gcall *call = as_a <gcall *> (stmt1);
+ internal_fn ifn = internal_fn_reciprocal (call);
+ if (ifn == IFN_LAST)
+ {
+ fndecl = gimple_call_fndecl (call);
+ if (!fndecl
+ || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
+ continue;
+ fndecl = targetm.builtin_reciprocal (fndecl);
+ if (!fndecl)
+ continue;
+ }
/* Check that all uses of the SSA name are divisions,
otherwise replacing the defining statement will do
@@ -636,28 +666,35 @@ pass_cse_reciprocals::execute (function *fun)
if (fail)
continue;
- gimple_replace_ssa_lhs (stmt1, arg1);
- if (gimple_call_internal_p (stmt1))
+ gimple_replace_ssa_lhs (call, arg1);
+ if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
{
auto_vec<tree, 4> args;
for (unsigned int i = 0;
- i < gimple_call_num_args (stmt1); i++)
- args.safe_push (gimple_call_arg (stmt1, i));
- gcall *stmt2 = gimple_build_call_vec (fndecl, args);
+ i < gimple_call_num_args (call); i++)
+ args.safe_push (gimple_call_arg (call, i));
+ gcall *stmt2;
+ if (ifn == IFN_LAST)
+ stmt2 = gimple_build_call_vec (fndecl, args);
+ else
+ stmt2 = gimple_build_call_internal_vec (ifn, args);
gimple_call_set_lhs (stmt2, arg1);
- if (gimple_vdef (stmt1))
+ if (gimple_vdef (call))
{
- gimple_set_vdef (stmt2, gimple_vdef (stmt1));
+ gimple_set_vdef (stmt2, gimple_vdef (call));
SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
}
- gimple_set_vuse (stmt2, gimple_vuse (stmt1));
- gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt1);
+ gimple_set_vuse (stmt2, gimple_vuse (call));
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
gsi_replace (&gsi2, stmt2, true);
}
else
{
- gimple_call_set_fndecl (stmt1, fndecl);
- update_stmt (stmt1);
+ if (ifn == IFN_LAST)
+ gimple_call_set_fndecl (call, fndecl);
+ else
+ gimple_call_set_internal_fn (call, ifn);
+ update_stmt (call);
}
reciprocal_stats.rfuncs_inserted++;