diff options
author | Evandro Menezes <e.menezes@samsung.com> | 2016-02-26 23:41:53 +0000 |
---|---|---|
committer | Evandro Menezes <evandro@gcc.gnu.org> | 2016-02-26 23:41:53 +0000 |
commit | 0c30e0f3160716430de36f359cd120aa875dcb82 (patch) | |
tree | fd190d707411a2a565e5dc493446708ec6eccf62 /gcc | |
parent | 20ba5f3318378eb29b15674e2ef37c6c26a1d465 (diff) | |
download | gcc-0c30e0f3160716430de36f359cd120aa875dcb82.zip gcc-0c30e0f3160716430de36f359cd120aa875dcb82.tar.gz gcc-0c30e0f3160716430de36f359cd120aa875dcb82.tar.bz2 |
Rename the tuning option and related functions to enable the Newton series for the reciprocal square root to reflect its approximative characteristic.
gcc/
* config/aarch64/aarch64-protos.h (aarch64_emit_swrsqrt): Rename
function to "aarch64_emit_approx_rsqrt".
* config/aarch64/aarch64-tuning-flags.def: Rename tuning flag to
AARCH64_EXTRA_TUNE_APPROX_RSQRT.
* config/aarch64/aarch64.c (exynosm1_tunigs): Use new flag name.
(xgene1_tunings): Likewise.
(use_rsqrt_p): Likewise.
(aarch64_emit_swrsqrt): Use new function name.
* config/aarch64/aarch64-simd.md (aarch64_rsqrts_*): Likewise.
* config/aarch64/aarch64.opt (mlow-precision-recip-sqrt): Reword the
text explaining this option.
* doc/invoke.texi (-mlow-precision-recip-sqrt): Likewise.
From-SVN: r233772
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 20 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 5 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-tuning-flags.def | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 23 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.opt | 4 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 10 |
7 files changed, 41 insertions, 25 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 471fea5..8cece6b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2016-02-26 Evandro Menezes <e.menezes@samsung.com> + + Rename the AArch64 tuning option and related functions to enable the + Newton series for the reciprocal square root to reflect its + approximative characteristic. + + gcc/ + * config/aarch64/aarch64-protos.h (aarch64_emit_swrsqrt): Rename + function to "aarch64_emit_approx_rsqrt". + * config/aarch64/aarch64-tuning-flags.def: Rename tuning flag to + AARCH64_EXTRA_TUNE_APPROX_RSQRT. + * config/aarch64/aarch64.c (exynosm1_tunigs): Use new flag name. + (xgene1_tunings): Likewise. + (use_rsqrt_p): Likewise. + (aarch64_emit_swrsqrt): Use new function name. + * config/aarch64/aarch64-simd.md (aarch64_rsqrts_*): Likewise. + * config/aarch64/aarch64.opt (mlow-precision-recip-sqrt): Reword the + text explaining this option. + * doc/invoke.texi (-mlow-precision-recip-sqrt): Likewise. + 2016-02-26 Jakub Jelinek <jakub@redhat.com> PR target/69969 diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 78870e2..acf2062 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -360,8 +360,7 @@ void aarch64_emit_call_insn (rtx); void aarch64_register_pragmas (void); void aarch64_relayout_simd_types (void); void aarch64_reset_previous_fndecl (void); - -void aarch64_emit_swrsqrt (rtx, rtx); +void aarch64_emit_approx_rsqrt (rtx, rtx); /* Initialize builtins for SIMD intrinsics. */ void init_aarch64_simd_builtins (void); @@ -413,9 +412,7 @@ rtx aarch64_expand_builtin (tree exp, machine_mode mode ATTRIBUTE_UNUSED, int ignore ATTRIBUTE_UNUSED); tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED); - tree aarch64_builtin_rsqrt (unsigned int); - tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); extern void aarch64_split_combinev16qi (rtx operands[3]); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index d8497ab..bd73bce 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -405,7 +405,7 @@ UNSPEC_RSQRT))] "TARGET_SIMD" { - aarch64_emit_swrsqrt (operands[0], operands[1]); + aarch64_emit_approx_rsqrt (operands[0], operands[1]); DONE; }) diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index 8036cfe..7e45a0c 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -29,5 +29,5 @@ AARCH64_TUNE_ to give an enum name. */ AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS) -AARCH64_EXTRA_TUNING_OPTION ("recip_sqrt", RECIP_SQRT) +AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 3519c7b..801f95a 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -538,7 +538,7 @@ static const struct tune_params exynosm1_tunings = 48, /* max_case_values. */ 64, /* cache_line_size. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */ }; static const struct tune_params thunderx_tunings = @@ -586,7 +586,7 @@ static const struct tune_params xgene1_tunings = 0, /* max_case_values. */ 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */ }; /* Support for fine-grained override of the tuning structures. */ @@ -7460,8 +7460,8 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, return aarch64_tune_params.memmov_cost; } -/* Return true if it is safe and beneficial to use the rsqrt optabs to - optimize 1.0/sqrt. */ +/* Return true if it is safe and beneficial to use the approximate rsqrt optabs + to optimize 1.0/sqrt. */ static bool use_rsqrt_p (void) @@ -7469,12 +7469,12 @@ use_rsqrt_p (void) return (!flag_trapping_math && flag_unsafe_math_optimizations && ((aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_RECIP_SQRT) + & AARCH64_EXTRA_TUNE_APPROX_RSQRT) || flag_mrecip_low_precision_sqrt)); } -/* Function to decide when to use - reciprocal square root builtins. */ +/* Function to decide when to use the approximate reciprocal square root + builtin. */ static tree aarch64_builtin_reciprocal (tree fndecl) @@ -7522,12 +7522,12 @@ get_rsqrts_type (machine_mode mode) } } -/* Emit instruction sequence to compute - reciprocal square root. Use two Newton-Raphson steps - for single precision and three for double precision. */ +/* Emit instruction sequence to compute the reciprocal square root using the + Newton-Raphson series. Iterate over the series twice for SF + and thrice for DF. */ void -aarch64_emit_swrsqrt (rtx dst, rtx src) +aarch64_emit_approx_rsqrt (rtx dst, rtx src) { machine_mode mode = GET_MODE (src); gcc_assert ( @@ -7544,6 +7544,7 @@ aarch64_emit_swrsqrt (rtx dst, rtx src) int iterations = double_mode ? 3 : 2; + /* Optionally iterate over the series one less time than otherwise. */ if (flag_mrecip_low_precision_sqrt) iterations--; diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index 5cbd4cd..49ef0c6 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -151,5 +151,5 @@ PC relative literal loads. mlow-precision-recip-sqrt Common Var(flag_mrecip_low_precision_sqrt) Optimization -When calculating a sqrt approximation, run fewer steps. -This reduces precision, but can result in faster computation. +When calculating the reciprocal square root approximation, +uses one less step than otherwise, thus reducing latency and precision. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 18b2b8f..4b5df0b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -12884,12 +12884,10 @@ corresponding flag to the linker. @item -mno-low-precision-recip-sqrt @opindex -mlow-precision-recip-sqrt @opindex -mno-low-precision-recip-sqrt -The square root estimate uses two steps instead of three for double-precision, -and one step instead of two for single-precision. -Thus reducing latency and precision. -This is only relevant if @option{-ffast-math} activates -reciprocal square root estimate instructions. -Which in turn depends on the target processor. +When calculating the reciprocal square root approximation, +uses one less step than otherwise, thus reducing latency and precision. +This is only relevant if @option{-ffast-math} enables the reciprocal square root +approximation, which in turn depends on the target processor. @item -march=@var{name} @opindex march |