aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorEvandro Menezes <e.menezes@samsung.com>2016-02-26 23:41:53 +0000
committerEvandro Menezes <evandro@gcc.gnu.org>2016-02-26 23:41:53 +0000
commit0c30e0f3160716430de36f359cd120aa875dcb82 (patch)
treefd190d707411a2a565e5dc493446708ec6eccf62 /gcc
parent20ba5f3318378eb29b15674e2ef37c6c26a1d465 (diff)
downloadgcc-0c30e0f3160716430de36f359cd120aa875dcb82.zip
gcc-0c30e0f3160716430de36f359cd120aa875dcb82.tar.gz
gcc-0c30e0f3160716430de36f359cd120aa875dcb82.tar.bz2
Rename the tuning option and related functions to enable the Newton series for the reciprocal square root to reflect its approximative characteristic.
gcc/ * config/aarch64/aarch64-protos.h (aarch64_emit_swrsqrt): Rename function to "aarch64_emit_approx_rsqrt". * config/aarch64/aarch64-tuning-flags.def: Rename tuning flag to AARCH64_EXTRA_TUNE_APPROX_RSQRT. * config/aarch64/aarch64.c (exynosm1_tunigs): Use new flag name. (xgene1_tunings): Likewise. (use_rsqrt_p): Likewise. (aarch64_emit_swrsqrt): Use new function name. * config/aarch64/aarch64-simd.md (aarch64_rsqrts_*): Likewise. * config/aarch64/aarch64.opt (mlow-precision-recip-sqrt): Reword the text explaining this option. * doc/invoke.texi (-mlow-precision-recip-sqrt): Likewise. From-SVN: r233772
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog20
-rw-r--r--gcc/config/aarch64/aarch64-protos.h5
-rw-r--r--gcc/config/aarch64/aarch64-simd.md2
-rw-r--r--gcc/config/aarch64/aarch64-tuning-flags.def2
-rw-r--r--gcc/config/aarch64/aarch64.c23
-rw-r--r--gcc/config/aarch64/aarch64.opt4
-rw-r--r--gcc/doc/invoke.texi10
7 files changed, 41 insertions, 25 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 471fea5..8cece6b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2016-02-26 Evandro Menezes <e.menezes@samsung.com>
+
+ Rename the AArch64 tuning option and related functions to enable the
+ Newton series for the reciprocal square root to reflect its
+ approximative characteristic.
+
+ gcc/
+ * config/aarch64/aarch64-protos.h (aarch64_emit_swrsqrt): Rename
+ function to "aarch64_emit_approx_rsqrt".
+ * config/aarch64/aarch64-tuning-flags.def: Rename tuning flag to
+ AARCH64_EXTRA_TUNE_APPROX_RSQRT.
+ * config/aarch64/aarch64.c (exynosm1_tunigs): Use new flag name.
+ (xgene1_tunings): Likewise.
+ (use_rsqrt_p): Likewise.
+ (aarch64_emit_swrsqrt): Use new function name.
+ * config/aarch64/aarch64-simd.md (aarch64_rsqrts_*): Likewise.
+ * config/aarch64/aarch64.opt (mlow-precision-recip-sqrt): Reword the
+ text explaining this option.
+ * doc/invoke.texi (-mlow-precision-recip-sqrt): Likewise.
+
2016-02-26 Jakub Jelinek <jakub@redhat.com>
PR target/69969
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 78870e2..acf2062 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -360,8 +360,7 @@ void aarch64_emit_call_insn (rtx);
void aarch64_register_pragmas (void);
void aarch64_relayout_simd_types (void);
void aarch64_reset_previous_fndecl (void);
-
-void aarch64_emit_swrsqrt (rtx, rtx);
+void aarch64_emit_approx_rsqrt (rtx, rtx);
/* Initialize builtins for SIMD intrinsics. */
void init_aarch64_simd_builtins (void);
@@ -413,9 +412,7 @@ rtx aarch64_expand_builtin (tree exp,
machine_mode mode ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED);
tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED);
-
tree aarch64_builtin_rsqrt (unsigned int);
-
tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
extern void aarch64_split_combinev16qi (rtx operands[3]);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index d8497ab..bd73bce 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -405,7 +405,7 @@
UNSPEC_RSQRT))]
"TARGET_SIMD"
{
- aarch64_emit_swrsqrt (operands[0], operands[1]);
+ aarch64_emit_approx_rsqrt (operands[0], operands[1]);
DONE;
})
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index 8036cfe..7e45a0c 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -29,5 +29,5 @@
AARCH64_TUNE_ to give an enum name. */
AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
-AARCH64_EXTRA_TUNING_OPTION ("recip_sqrt", RECIP_SQRT)
+AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3519c7b..801f95a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -538,7 +538,7 @@ static const struct tune_params exynosm1_tunings =
48, /* max_case_values. */
64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
};
static const struct tune_params thunderx_tunings =
@@ -586,7 +586,7 @@ static const struct tune_params xgene1_tunings =
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
};
/* Support for fine-grained override of the tuning structures. */
@@ -7460,8 +7460,8 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
return aarch64_tune_params.memmov_cost;
}
-/* Return true if it is safe and beneficial to use the rsqrt optabs to
- optimize 1.0/sqrt. */
+/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
+ to optimize 1.0/sqrt. */
static bool
use_rsqrt_p (void)
@@ -7469,12 +7469,12 @@ use_rsqrt_p (void)
return (!flag_trapping_math
&& flag_unsafe_math_optimizations
&& ((aarch64_tune_params.extra_tuning_flags
- & AARCH64_EXTRA_TUNE_RECIP_SQRT)
+ & AARCH64_EXTRA_TUNE_APPROX_RSQRT)
|| flag_mrecip_low_precision_sqrt));
}
-/* Function to decide when to use
- reciprocal square root builtins. */
+/* Function to decide when to use the approximate reciprocal square root
+ builtin. */
static tree
aarch64_builtin_reciprocal (tree fndecl)
@@ -7522,12 +7522,12 @@ get_rsqrts_type (machine_mode mode)
}
}
-/* Emit instruction sequence to compute
- reciprocal square root. Use two Newton-Raphson steps
- for single precision and three for double precision. */
+/* Emit instruction sequence to compute the reciprocal square root using the
+ Newton-Raphson series. Iterate over the series twice for SF
+ and thrice for DF. */
void
-aarch64_emit_swrsqrt (rtx dst, rtx src)
+aarch64_emit_approx_rsqrt (rtx dst, rtx src)
{
machine_mode mode = GET_MODE (src);
gcc_assert (
@@ -7544,6 +7544,7 @@ aarch64_emit_swrsqrt (rtx dst, rtx src)
int iterations = double_mode ? 3 : 2;
+ /* Optionally iterate over the series one less time than otherwise. */
if (flag_mrecip_low_precision_sqrt)
iterations--;
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 5cbd4cd..49ef0c6 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -151,5 +151,5 @@ PC relative literal loads.
mlow-precision-recip-sqrt
Common Var(flag_mrecip_low_precision_sqrt) Optimization
-When calculating a sqrt approximation, run fewer steps.
-This reduces precision, but can result in faster computation.
+When calculating the reciprocal square root approximation,
+uses one less step than otherwise, thus reducing latency and precision.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 18b2b8f..4b5df0b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12884,12 +12884,10 @@ corresponding flag to the linker.
@item -mno-low-precision-recip-sqrt
@opindex -mlow-precision-recip-sqrt
@opindex -mno-low-precision-recip-sqrt
-The square root estimate uses two steps instead of three for double-precision,
-and one step instead of two for single-precision.
-Thus reducing latency and precision.
-This is only relevant if @option{-ffast-math} activates
-reciprocal square root estimate instructions.
-Which in turn depends on the target processor.
+When calculating the reciprocal square root approximation,
+uses one less step than otherwise, thus reducing latency and precision.
+This is only relevant if @option{-ffast-math} enables the reciprocal square root
+approximation, which in turn depends on the target processor.
@item -march=@var{name}
@opindex march