diff options
author | Steve Ellcey <sje@cup.hp.com> | 2009-05-19 18:18:08 +0000 |
---|---|---|
committer | Steve Ellcey <sje@gcc.gnu.org> | 2009-05-19 18:18:08 +0000 |
commit | f3a83111278d2fe8a60e8fec8b1fbbeee69d6d31 (patch) | |
tree | 09f6fa1806cdc0177422ec6b1a176bb043076c3a /gcc/config/ia64 | |
parent | 1ffc7157ef3f33a65c7acbf23b204030b21d82fe (diff) | |
download | gcc-f3a83111278d2fe8a60e8fec8b1fbbeee69d6d31.zip gcc-f3a83111278d2fe8a60e8fec8b1fbbeee69d6d31.tar.gz gcc-f3a83111278d2fe8a60e8fec8b1fbbeee69d6d31.tar.bz2 |
ia64-protos.h (ia64_dconst_0_5): New.
* config/ia64/ia64-protos.h (ia64_dconst_0_5): New.
(ia64_dconst_0_375): New.
* config/ia64/ia64.c (ia64_override_options): Remove
-minline-sqrt-min-latency warning.
(ia64_dconst_0_5_rtx, ia64_dconst_0_5): New.
(ia64_dconst_0_375_rtx, ia64_dconst_0_375): New
* config/ia64/ia64.md (*sqrt_approx): Remove.
(sqrtsf2): Remove #if 0.
(sqrtsf2_internal_thr): Rewrite and move to div.md.
(sqrtdf): Remove assert.
(sqrtdf2_internal_thr): Rewrite and move to div.md.
(sqrtxf2): Remove #if 0.
(sqrtxf2_internal_thr): Rewrite and move to div.md.
* div.md (sqrt_approx_rf): New.
(sqrtsf2_internal_thr): New implementation.
(sqrtsf2_internal_lat): New.
(sqrtdf2_internal_thr: New implementation.
(sqrtxf2_internal): New implementation.
From-SVN: r147713
Diffstat (limited to 'gcc/config/ia64')
-rw-r--r-- | gcc/config/ia64/div.md | 278 | ||||
-rw-r--r-- | gcc/config/ia64/ia64-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.c | 35 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.md | 420 |
4 files changed, 315 insertions, 421 deletions
diff --git a/gcc/config/ia64/div.md b/gcc/config/ia64/div.md index 5f9d005..583e9e3 100644 --- a/gcc/config/ia64/div.md +++ b/gcc/config/ia64/div.md @@ -518,3 +518,281 @@ emit_insn (gen_truncrfxf2 (operands[0], q_res)); DONE; }) + + +;; SQRT operations + + +(define_insn "sqrt_approx_rf" + [(set (match_operand:RF 0 "fr_register_operand" "=f") + (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")] + UNSPEC_FR_SQRT_RECIP_APPROX_RES)) + (set (match_operand:BI 2 "register_operand" "=c") + (unspec:BI [(match_dup 1)] UNSPEC_FR_SQRT_RECIP_APPROX)) + (use (match_operand:SI 3 "const_int_operand" ""))] + "" + "frsqrta.s%3 %0, %2 = %F1" + [(set_attr "itanium_class" "fmisc") + (set_attr "predicable" "no")]) + +(define_expand "sqrtsf2_internal_thr" + [(set (match_operand:SF 0 "fr_register_operand" "") + (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))] + "TARGET_INLINE_SQRT" +{ + rtx y = gen_reg_rtx (RFmode); + rtx b = gen_reg_rtx (RFmode); + rtx g = gen_reg_rtx (RFmode); + rtx e = gen_reg_rtx (RFmode); + rtx s = gen_reg_rtx (RFmode); + rtx f = gen_reg_rtx (RFmode); + rtx y1 = gen_reg_rtx (RFmode); + rtx g1 = gen_reg_rtx (RFmode); + rtx h = gen_reg_rtx (RFmode); + rtx d = gen_reg_rtx (RFmode); + rtx g2 = gen_reg_rtx (RFmode); + rtx cond = gen_reg_rtx (BImode); + rtx zero = CONST0_RTX (RFmode); + rtx one = CONST1_RTX (RFmode); + rtx c1 = ia64_dconst_0_5(); + rtx c2 = ia64_dconst_0_375(); + rtx reg_df_c1 = gen_reg_rtx (DFmode); + rtx reg_df_c2 = gen_reg_rtx (DFmode); + rtx reg_rf_c1 = gen_reg_rtx (RFmode); + rtx reg_rf_c2 = gen_reg_rtx (RFmode); + rtx status0 = CONST0_RTX (SImode); + rtx status1 = CONST1_RTX (SImode); + rtx trunc_sgl = CONST0_RTX (SImode); + rtx trunc_off = CONST2_RTX (SImode); + + /* Put needed constants into registers. */ + emit_insn (gen_movdf (reg_df_c1, c1)); + emit_insn (gen_movdf (reg_df_c2, c2)); + emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); + emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2)); + /* Empty conversion to put input into RFmode. */ + emit_insn (gen_extendsfrf2 (b, operands[1])); + /* y = sqrt (1 / b) */ + emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); + /* g = b * y */ + emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); + /* e = 1 - (g * y) */ + emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off)); + /* s = 0.5 + (0.375 * e) */ + emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off)); + /* f = y * e */ + emit_insn (gen_mulrf3_cond (f, cond, y, e, zero, status1, trunc_off)); + /* y1 = y + (f * s) */ + emit_insn (gen_m2addrf4_cond (y1, cond, y, f, s, zero, status1, trunc_off)); + /* g1 = single (b * y1) */ + emit_insn (gen_mulrf3_cond (g1, cond, b, y1, zero, status1, trunc_sgl)); + /* h = 0.5 * y1 */ + emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y1, zero, status1, trunc_off)); + /* d = b - g1 * g1 */ + emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off)); + /* g2 = single(g1 + (d * h)) */ + emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h, y, status0, trunc_sgl)); + /* Conversion back into SFmode. */ + emit_insn (gen_truncrfsf2 (operands[0], g2)); + DONE; +}) + +(define_expand "sqrtsf2_internal_lat" + [(set (match_operand:SF 0 "fr_register_operand" "") + (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))] + "TARGET_INLINE_SQRT" +{ + rtx y = gen_reg_rtx (RFmode); + rtx b = gen_reg_rtx (RFmode); + rtx g = gen_reg_rtx (RFmode); + rtx g1 = gen_reg_rtx (RFmode); + rtx g2 = gen_reg_rtx (RFmode); + rtx e = gen_reg_rtx (RFmode); + rtx s = gen_reg_rtx (RFmode); + rtx f = gen_reg_rtx (RFmode); + rtx f1 = gen_reg_rtx (RFmode); + rtx h = gen_reg_rtx (RFmode); + rtx h1 = gen_reg_rtx (RFmode); + rtx d = gen_reg_rtx (RFmode); + rtx cond = gen_reg_rtx (BImode); + rtx zero = CONST0_RTX (RFmode); + rtx one = CONST1_RTX (RFmode); + rtx c1 = ia64_dconst_0_5(); + rtx c2 = ia64_dconst_0_375(); + rtx reg_df_c1 = gen_reg_rtx (DFmode); + rtx reg_df_c2 = gen_reg_rtx (DFmode); + rtx reg_rf_c1 = gen_reg_rtx (RFmode); + rtx reg_rf_c2 = gen_reg_rtx (RFmode); + rtx status0 = CONST0_RTX (SImode); + rtx status1 = CONST1_RTX (SImode); + rtx trunc_sgl = CONST0_RTX (SImode); + rtx trunc_off = CONST2_RTX (SImode); + + /* Put needed constants into registers. */ + emit_insn (gen_movdf (reg_df_c1, c1)); + emit_insn (gen_movdf (reg_df_c2, c2)); + emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); + emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2)); + /* Empty conversion to put input into RFmode. */ + emit_insn (gen_extendsfrf2 (b, operands[1])); + /* y = sqrt (1 / b) */ + emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); + /* g = b * y */ + emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); + /* e = 1 - (g * y) */ + emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off)); + /* h = 0.5 * y */ + emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off)); + /* s = 0.5 + (0.375 * e) */ + emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off)); + /* f = e * g */ + emit_insn (gen_mulrf3_cond (f, cond, e, g, zero, status1, trunc_off)); + /* g1 = single (g + (f * s)) */ + emit_insn (gen_m2addrf4_cond (g1, cond, g, f, s, zero, status1, trunc_sgl)); + /* f1 = e * h */ + emit_insn (gen_mulrf3_cond (f1, cond, e, h, zero, status1, trunc_off)); + /* d = b - g1 * g1 */ + emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off)); + /* h1 = h + (f1 * s) */ + emit_insn (gen_m2addrf4_cond (h1, cond, h, f1, s, zero, status1, trunc_off)); + /* g2 = single(g1 + (d * h1)) */ + emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h1, y, status0, trunc_sgl)); + /* Conversion back into SFmode. */ + emit_insn (gen_truncrfsf2 (operands[0], g2)); + DONE; +}) + +(define_expand "sqrtdf2_internal_thr" + [(set (match_operand:DF 0 "fr_register_operand" "") + (sqrt:DF (match_operand:DF 1 "fr_register_operand" "")))] + "TARGET_INLINE_SQRT" +{ + rtx y = gen_reg_rtx (RFmode); + rtx b = gen_reg_rtx (RFmode); + rtx g = gen_reg_rtx (RFmode); + rtx g1 = gen_reg_rtx (RFmode); + rtx g2 = gen_reg_rtx (RFmode); + rtx g3 = gen_reg_rtx (RFmode); + rtx g4 = gen_reg_rtx (RFmode); + rtx r = gen_reg_rtx (RFmode); + rtx r1 = gen_reg_rtx (RFmode); + rtx h = gen_reg_rtx (RFmode); + rtx h1 = gen_reg_rtx (RFmode); + rtx h2 = gen_reg_rtx (RFmode); + rtx d = gen_reg_rtx (RFmode); + rtx d1 = gen_reg_rtx (RFmode); + rtx cond = gen_reg_rtx (BImode); + rtx zero = CONST0_RTX (RFmode); + rtx c1 = ia64_dconst_0_5(); + rtx reg_df_c1 = gen_reg_rtx (DFmode); + rtx reg_rf_c1 = gen_reg_rtx (RFmode); + rtx status0 = CONST0_RTX (SImode); + rtx status1 = CONST1_RTX (SImode); + rtx trunc_dbl = CONST1_RTX (SImode); + rtx trunc_off = CONST2_RTX (SImode); + + /* Put needed constants into registers. */ + emit_insn (gen_movdf (reg_df_c1, c1)); + emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); + /* Empty conversion to put input into RFmode. */ + emit_insn (gen_extenddfrf2 (b, operands[1])); + /* y = sqrt (1 / b) */ + emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); + /* g = b * y */ + emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); + /* h = 0.5 * y */ + emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off)); + /* r = 0.5 - (g * h) */ + emit_insn (gen_m2subrf4_cond (r, cond, reg_rf_c1, g, h, zero, status1, trunc_off)); + /* g1 = g + (g * r) */ + emit_insn (gen_m2addrf4_cond (g1, cond, g, g, r, zero, status1, trunc_off)); + /* h1 = h + (h * r) */ + emit_insn (gen_m2addrf4_cond (h1, cond, h, h, r, zero, status1, trunc_off)); + /* r1 = 0.5 - (g1 * h1) */ + emit_insn (gen_m2subrf4_cond (r1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off)); + /* g2 = g1 + (g1 * r1) */ + emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, r1, zero, status1, trunc_off)); + /* h2 = h1 + (h1 * r1) */ + emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, r1, zero, status1, trunc_off)); + /* d = b - (g2 * g2) */ + emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off)); + /* g3 = g2 + (d * h2) */ + emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off)); + /* d1 = b - (g3 * g3) */ + emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off)); + /* g4 = g3 + (d1 * h2) */ + emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h2, y, status1, trunc_dbl)); + /* Conversion back into SFmode. */ + emit_insn (gen_truncrfdf2 (operands[0], g4)); + DONE; +}) + +(define_expand "sqrtxf2_internal" + [(set (match_operand:XF 0 "fr_register_operand" "") + (sqrt:XF (match_operand:XF 1 "fr_register_operand" "")))] + "TARGET_INLINE_SQRT" +{ + rtx y = gen_reg_rtx (RFmode); + rtx b = gen_reg_rtx (RFmode); + rtx g = gen_reg_rtx (RFmode); + rtx g1 = gen_reg_rtx (RFmode); + rtx g2 = gen_reg_rtx (RFmode); + rtx g3 = gen_reg_rtx (RFmode); + rtx g4 = gen_reg_rtx (RFmode); + rtx e = gen_reg_rtx (RFmode); + rtx e1 = gen_reg_rtx (RFmode); + rtx e2 = gen_reg_rtx (RFmode); + rtx h = gen_reg_rtx (RFmode); + rtx h1 = gen_reg_rtx (RFmode); + rtx h2 = gen_reg_rtx (RFmode); + rtx h3 = gen_reg_rtx (RFmode); + rtx d = gen_reg_rtx (RFmode); + rtx d1 = gen_reg_rtx (RFmode); + rtx cond = gen_reg_rtx (BImode); + rtx zero = CONST0_RTX (RFmode); + rtx c1 = ia64_dconst_0_5(); + rtx reg_df_c1 = gen_reg_rtx (DFmode); + rtx reg_rf_c1 = gen_reg_rtx (RFmode); + rtx status0 = CONST0_RTX (SImode); + rtx status1 = CONST1_RTX (SImode); + rtx trunc_off = CONST2_RTX (SImode); + + /* Put needed constants into registers. */ + emit_insn (gen_movdf (reg_df_c1, c1)); + emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); + /* Empty conversion to put input into RFmode. */ + emit_insn (gen_extendxfrf2 (b, operands[1])); + /* y = sqrt (1 / b) */ + emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); + /* g = b * y */ + emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); + /* h = 0.5 * y */ + emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off)); + /* e = 0.5 - (g * h) */ + emit_insn (gen_m2subrf4_cond (e, cond, reg_rf_c1, g, h, zero, status1, trunc_off)); + /* g1 = g + (g * e) */ + emit_insn (gen_m2addrf4_cond (g1, cond, g, g, e, zero, status1, trunc_off)); + /* h1 = h + (h * e) */ + emit_insn (gen_m2addrf4_cond (h1, cond, h, h, e, zero, status1, trunc_off)); + /* e1 = 0.5 - (g1 * h1) */ + emit_insn (gen_m2subrf4_cond (e1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off)); + /* g2 = g1 + (g1 * e1) */ + emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, e1, zero, status1, trunc_off)); + /* h2 = h1 + (h1 * e1) */ + emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, e1, zero, status1, trunc_off)); + /* d = b - (g2 * g2) */ + emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off)); + /* e2 = 0.5 - (g2 * h2) */ + emit_insn (gen_m2subrf4_cond (e2, cond, reg_rf_c1, g2, h2, zero, status1, trunc_off)); + /* g3 = g2 + (d * h2) */ + emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off)); + /* h3 = h2 + (e2 * h2) */ + emit_insn (gen_m2addrf4_cond (h3, cond, h2, e2, h2, zero, status1, trunc_off)); + /* d1 = b - (g3 * g3) */ + emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off)); + /* g4 = g3 + (d1 * h3) */ + emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h3, y, status1, trunc_off)); + /* Conversion back into SFmode. */ + emit_insn (gen_truncrfxf2 (operands[0], g4)); + DONE; +}) diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index 0859c7f..5a68854 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -102,3 +102,6 @@ extern void ia64_profile_hook (int); extern void ia64_optimization_options (int, int); extern void ia64_init_expanders (void); + +extern rtx ia64_dconst_0_5 (void); +extern rtx ia64_dconst_0_375 (void); diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index b44f530..f9b8460 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -5280,12 +5280,6 @@ ia64_override_options (void) if (TARGET_AUTO_PIC) target_flags |= MASK_CONST_GP; - if (TARGET_INLINE_SQRT == INL_MIN_LAT) - { - warning (0, "not yet implemented: latency-optimized inline square root"); - TARGET_INLINE_SQRT = INL_MAX_THR; - } - ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload; flag_schedule_insns_after_reload = 0; @@ -10571,4 +10565,33 @@ ia64_c_mode_for_suffix (char suffix) return VOIDmode; } +static GTY(()) rtx ia64_dconst_0_5_rtx; + +rtx +ia64_dconst_0_5 (void) +{ + if (! ia64_dconst_0_5_rtx) + { + REAL_VALUE_TYPE rv; + real_from_string (&rv, "0.5"); + ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode); + } + return ia64_dconst_0_5_rtx; +} + +static GTY(()) rtx ia64_dconst_0_375_rtx; + +rtx +ia64_dconst_0_375 (void) +{ + if (! ia64_dconst_0_375_rtx) + { + REAL_VALUE_TYPE rv; + real_from_string (&rv, "0.375"); + ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode); + } + return ia64_dconst_0_375_rtx; +} + + #include "gt-ia64.h" diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md index e5a6d81..2919772 100644 --- a/gcc/config/ia64/ia64.md +++ b/gcc/config/ia64/ia64.md @@ -3161,21 +3161,6 @@ DONE; }) -;; Inline square root. - -(define_insn "*sqrt_approx" - [(set (match_operand:XF 0 "fr_register_operand" "=f") - (div:XF (const_int 1) - (unspec:XF [(match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")] - UNSPEC_FR_SQRT_RECIP_APPROX_RES))) - (set (match_operand:BI 1 "register_operand" "=c") - (unspec:BI [(match_dup 2)] UNSPEC_FR_SQRT_RECIP_APPROX)) - (use (match_operand:SI 3 "const_int_operand" "")) ] - "" - "frsqrta.s%3 %0, %1 = %2" - [(set_attr "itanium_class" "fmisc") - (set_attr "predicable" "no")]) - (define_insn "setf_exp_xf" [(set (match_operand:XF 0 "fr_register_operand" "=f") (unspec:XF [(match_operand:DI 1 "register_operand" "r")] @@ -3184,133 +3169,22 @@ "setf.exp %0 = %1" [(set_attr "itanium_class" "frfr")]) + +;; Inline square root. + (define_expand "sqrtsf2" [(set (match_operand:SF 0 "fr_register_operand" "=&f") (sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))] "TARGET_INLINE_SQRT" { rtx insn; -#if 0 if (TARGET_INLINE_SQRT == INL_MIN_LAT) insn = gen_sqrtsf2_internal_lat (operands[0], operands[1]); else -#else - gcc_assert (TARGET_INLINE_SQRT != INL_MIN_LAT); -#endif - insn = gen_sqrtsf2_internal_thr (operands[0], operands[1]); + insn = gen_sqrtsf2_internal_thr (operands[0], operands[1]); emit_insn (insn); DONE; }) - -;; Latency-optimized square root. -;; FIXME: Implement. - -;; Throughput-optimized square root. - -(define_insn_and_split "sqrtsf2_internal_thr" - [(set (match_operand:SF 0 "fr_register_operand" "=&f") - (sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG"))) - ;; Register r2 in optimization guide. - (clobber (match_scratch:DI 2 "=r")) - ;; Register f8 in optimization guide - (clobber (match_scratch:XF 3 "=&f")) - ;; Register f9 in optimization guide - (clobber (match_scratch:XF 4 "=&f")) - ;; Register f10 in optimization guide - (clobber (match_scratch:XF 5 "=&f")) - ;; Register p6 in optimization guide. - (clobber (match_scratch:BI 6 "=c"))] - "TARGET_INLINE_SQRT == INL_MAX_THR" - "#" - "&& reload_completed" - [ ;; exponent of +1/2 in r2 - (set (match_dup 2) (const_int 65534)) - ;; +1/2 in f8 - (set (match_dup 3) - (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP)) - ;; Step 1 - ;; y0 = 1/sqrt(a) in f7 - (parallel [(set (match_dup 7) - (div:XF (const_int 1) - (unspec:XF [(match_dup 8)] - UNSPEC_FR_SQRT_RECIP_APPROX_RES))) - (set (match_dup 6) - (unspec:BI [(match_dup 8)] - UNSPEC_FR_SQRT_RECIP_APPROX)) - (use (const_int 0))]) - ;; Step 2 - ;; H0 = 1/2 * y0 in f9 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 4) - (plus:XF (mult:XF (match_dup 3) (match_dup 7)) - (match_dup 9))) - (use (const_int 1))])) - ;; Step 3 - ;; S0 = a * y0 in f7 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 7) - (plus:XF (mult:XF (match_dup 8) (match_dup 7)) - (match_dup 9))) - (use (const_int 1))])) - ;; Step 4 - ;; d = 1/2 - S0 * H0 in f10 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 5) - (minus:XF (match_dup 3) - (mult:XF (match_dup 7) (match_dup 4)))) - (use (const_int 1))])) - ;; Step 5 - ;; d' = d + 1/2 * d in f8 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 3) - (plus:XF (mult:XF (match_dup 3) (match_dup 5)) - (match_dup 5))) - (use (const_int 1))])) - ;; Step 6 - ;; e = d + d * d' in f8 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 3) - (plus:XF (mult:XF (match_dup 5) (match_dup 3)) - (match_dup 5))) - (use (const_int 1))])) - ;; Step 7 - ;; S1 = S0 + e * S0 in f7 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 0) - (float_truncate:SF - (plus:XF (mult:XF (match_dup 3) (match_dup 7)) - (match_dup 7)))) - (use (const_int 1))])) - ;; Step 8 - ;; H1 = H0 + e * H0 in f8 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 3) - (plus:XF (mult:XF (match_dup 3) (match_dup 4)) - (match_dup 4))) - (use (const_int 1))])) - ;; Step 9 - ;; d1 = a - S1 * S1 in f9 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 4) - (minus:XF (match_dup 8) - (mult:XF (match_dup 7) (match_dup 7)))) - (use (const_int 1))])) - ;; Step 10 - ;; S = S1 + d1 * H1 in f7 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 0) - (float_truncate:SF - (plus:XF (mult:XF (match_dup 4) (match_dup 3)) - (match_dup 7)))) - (use (const_int 0))]))] -{ - /* Generate 82-bit versions of the input and output operands. */ - operands[7] = gen_rtx_REG (XFmode, REGNO (operands[0])); - operands[8] = gen_rtx_REG (XFmode, REGNO (operands[1])); - /* Generate required floating-point constants. */ - operands[9] = CONST0_RTX (XFmode); -} - [(set_attr "predicable" "no")]) ;; :::::::::::::::::::: ;; :: @@ -3546,143 +3420,11 @@ if (TARGET_INLINE_SQRT == INL_MIN_LAT) insn = gen_sqrtdf2_internal_lat (operands[0], operands[1]); else -#else - gcc_assert (TARGET_INLINE_SQRT != INL_MIN_LAT); #endif insn = gen_sqrtdf2_internal_thr (operands[0], operands[1]); emit_insn (insn); DONE; }) - -;; Latency-optimized square root. -;; FIXME: Implement. - -;; Throughput-optimized square root. - -(define_insn_and_split "sqrtdf2_internal_thr" - [(set (match_operand:DF 0 "fr_register_operand" "=&f") - (sqrt:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG"))) - ;; Register r2 in optimization guide. - (clobber (match_scratch:DI 2 "=r")) - ;; Register f8 in optimization guide - (clobber (match_scratch:XF 3 "=&f")) - ;; Register f9 in optimization guide - (clobber (match_scratch:XF 4 "=&f")) - ;; Register f10 in optimization guide - (clobber (match_scratch:XF 5 "=&f")) - ;; Register p6 in optimization guide. - (clobber (match_scratch:BI 6 "=c"))] - "TARGET_INLINE_SQRT == INL_MAX_THR" - "#" - "&& reload_completed" - [ ;; exponent of +1/2 in r2 - (set (match_dup 2) (const_int 65534)) - ;; +1/2 in f10 - (set (match_dup 5) - (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP)) - ;; Step 1 - ;; y0 = 1/sqrt(a) in f7 - (parallel [(set (match_dup 7) - (div:XF (const_int 1) - (unspec:XF [(match_dup 8)] - UNSPEC_FR_SQRT_RECIP_APPROX_RES))) - (set (match_dup 6) - (unspec:BI [(match_dup 8)] - UNSPEC_FR_SQRT_RECIP_APPROX)) - (use (const_int 0))]) - ;; Step 2 - ;; H0 = 1/2 * y0 in f8 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 3) - (plus:XF (mult:XF (match_dup 5) (match_dup 7)) - (match_dup 9))) - (use (const_int 1))])) - ;; Step 3 - ;; G0 = a * y0 in f7 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 7) - (plus:XF (mult:XF (match_dup 8) (match_dup 7)) - (match_dup 9))) - (use (const_int 1))])) - ;; Step 4 - ;; r0 = 1/2 - G0 * H0 in f9 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 4) - (minus:XF (match_dup 5) - (mult:XF (match_dup 7) (match_dup 3)))) - (use (const_int 1))])) - ;; Step 5 - ;; H1 = H0 + r0 * H0 in f8 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 3) - (plus:XF (mult:XF (match_dup 4) (match_dup 3)) - (match_dup 3))) - (use (const_int 1))])) - ;; Step 6 - ;; G1 = G0 + r0 * G0 in f7 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 7) - (plus:XF (mult:XF (match_dup 4) (match_dup 7)) - (match_dup 7))) - (use (const_int 1))])) - ;; Step 7 - ;; r1 = 1/2 - G1 * H1 in f9 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 4) - (minus:XF (match_dup 5) - (mult:XF (match_dup 7) (match_dup 3)))) - (use (const_int 1))])) - ;; Step 8 - ;; H2 = H1 + r1 * H1 in f8 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 3) - (plus:XF (mult:XF (match_dup 4) (match_dup 3)) - (match_dup 3))) - (use (const_int 1))])) - ;; Step 9 - ;; G2 = G1 + r1 * G1 in f7 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 7) - (plus:XF (mult:XF (match_dup 4) (match_dup 7)) - (match_dup 7))) - (use (const_int 1))])) - ;; Step 10 - ;; d2 = a - G2 * G2 in f9 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 4) - (minus:XF (match_dup 8) - (mult:XF (match_dup 7) (match_dup 7)))) - (use (const_int 1))])) - ;; Step 11 - ;; G3 = G2 + d2 * H2 in f7 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 7) - (plus:XF (mult:XF (match_dup 4) (match_dup 3)) - (match_dup 7))) - (use (const_int 1))])) - ;; Step 12 - ;; d3 = a - G3 * G3 in f9 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 4) - (minus:XF (match_dup 8) - (mult:XF (match_dup 7) (match_dup 7)))) - (use (const_int 1))])) - ;; Step 13 - ;; S = G3 + d3 * H2 in f7 - (cond_exec (ne (match_dup 6) (const_int 0)) - (parallel [(set (match_dup 0) - (float_truncate:DF - (plus:XF (mult:XF (match_dup 4) (match_dup 3)) - (match_dup 7)))) - (use (const_int 0))]))] -{ - /* Generate 82-bit versions of the input and output operands. */ - operands[7] = gen_rtx_REG (XFmode, REGNO (operands[0])); - operands[8] = gen_rtx_REG (XFmode, REGNO (operands[1])); - /* Generate required floating-point constants. */ - operands[9] = CONST0_RTX (XFmode); -} - [(set_attr "predicable" "no")]) ;; :::::::::::::::::::: ;; :: @@ -4056,163 +3798,11 @@ "TARGET_INLINE_SQRT" { rtx insn; -#if 0 - if (TARGET_INLINE_SQRT == INL_MIN_LAT) - insn = gen_sqrtxf2_internal_lat (operands[0], operands[1]); - else -#else - gcc_assert (TARGET_INLINE_SQRT != INL_MIN_LAT); -#endif - insn = gen_sqrtxf2_internal_thr (operands[0], operands[1]); + insn = gen_sqrtxf2_internal (operands[0], operands[1]); emit_insn (insn); DONE; }) -;; Latency-optimized square root. -;; FIXME: Implement. - -;; Throughput-optimized square root. - -(define_insn_and_split "sqrtxf2_internal_thr" - [(set (match_operand:XF 0 "fr_register_operand" "=&f") - (sqrt:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG"))) - ;; Register r2 in optimization guide. - (clobber (match_scratch:DI 2 "=r")) - ;; Register f8 in optimization guide - (clobber (match_scratch:XF 3 "=&f")) - ;; Register f9 in optimization guide - (clobber (match_scratch:XF 4 "=&f")) - ;; Register f10 in optimization guide - (clobber (match_scratch:XF 5 "=&f")) - ;; Register f11 in optimization guide - (clobber (match_scratch:XF 6 "=&f")) - ;; Register p6 in optimization guide. - (clobber (match_scratch:BI 7 "=c"))] - "TARGET_INLINE_SQRT == INL_MAX_THR" - "#" - "&& reload_completed" - [ ;; exponent of +1/2 in r2 - (set (match_dup 2) (const_int 65534)) - ;; +1/2 in f8. The Intel manual mistakenly specifies f10. - (set (match_dup 3) - (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP)) - ;; Step 1 - ;; y0 = 1/sqrt(a) in f7 - (parallel [(set (match_dup 8) - (div:XF (const_int 1) - (unspec:XF [(match_dup 9)] - UNSPEC_FR_SQRT_RECIP_APPROX_RES))) - (set (match_dup 7) - (unspec:BI [(match_dup 9)] - UNSPEC_FR_SQRT_RECIP_APPROX)) - (use (const_int 0))]) - ;; Step 2 - ;; H0 = 1/2 * y0 in f9 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 4) - (plus:XF (mult:XF (match_dup 3) (match_dup 8)) - (match_dup 10))) - (use (const_int 1))])) - ;; Step 3 - ;; S0 = a * y0 in f7 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 8) - (plus:XF (mult:XF (match_dup 9) (match_dup 8)) - (match_dup 10))) - (use (const_int 1))])) - ;; Step 4 - ;; d0 = 1/2 - S0 * H0 in f10 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 5) - (minus:XF (match_dup 3) - (mult:XF (match_dup 8) (match_dup 4)))) - (use (const_int 1))])) - ;; Step 5 - ;; H1 = H0 + d0 * H0 in f9 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 4) - (plus:XF (mult:XF (match_dup 5) (match_dup 4)) - (match_dup 4))) - (use (const_int 1))])) - ;; Step 6 - ;; S1 = S0 + d0 * S0 in f7 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 8) - (plus:XF (mult:XF (match_dup 5) (match_dup 8)) - (match_dup 8))) - (use (const_int 1))])) - ;; Step 7 - ;; d1 = 1/2 - S1 * H1 in f10 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 5) - (minus:XF (match_dup 3) - (mult:XF (match_dup 8) (match_dup 4)))) - (use (const_int 1))])) - ;; Step 8 - ;; H2 = H1 + d1 * H1 in f9 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 4) - (plus:XF (mult:XF (match_dup 5) (match_dup 4)) - (match_dup 4))) - (use (const_int 1))])) - ;; Step 9 - ;; S2 = S1 + d1 * S1 in f7 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 8) - (plus:XF (mult:XF (match_dup 5) (match_dup 8)) - (match_dup 8))) - (use (const_int 1))])) - ;; Step 10 - ;; d2 = 1/2 - S2 * H2 in f10 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 5) - (minus:XF (match_dup 3) - (mult:XF (match_dup 8) (match_dup 4)))) - (use (const_int 1))])) - ;; Step 11 - ;; e2 = a - S2 * S2 in f8 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 3) - (minus:XF (match_dup 9) - (mult:XF (match_dup 8) (match_dup 8)))) - (use (const_int 1))])) - ;; Step 12 - ;; S3 = S2 + e2 * H2 in f7 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 8) - (plus:XF (mult:XF (match_dup 3) (match_dup 4)) - (match_dup 8))) - (use (const_int 1))])) - ;; Step 13 - ;; H3 = H2 + d2 * H2 in f9 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 4) - (plus:XF (mult:XF (match_dup 5) (match_dup 4)) - (match_dup 4))) - (use (const_int 1))])) - ;; Step 14 - ;; e3 = a - S3 * S3 in f8 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 3) - (minus:XF (match_dup 9) - (mult:XF (match_dup 8) (match_dup 8)))) - (use (const_int 1))])) - ;; Step 15 - ;; S = S3 + e3 * H3 in f7 - (cond_exec (ne (match_dup 7) (const_int 0)) - (parallel [(set (match_dup 0) - (plus:XF (mult:XF (match_dup 3) (match_dup 4)) - (match_dup 8))) - (use (const_int 0))]))] -{ - /* Generate 82-bit versions of the input and output operands. */ - operands[8] = gen_rtx_REG (XFmode, REGNO (operands[0])); - operands[9] = gen_rtx_REG (XFmode, REGNO (operands[1])); - /* Generate required floating-point constants. */ - operands[10] = CONST0_RTX (XFmode); -} - [(set_attr "predicable" "no")]) - ;; ??? frcpa works like cmp.foo.unc. (define_insn "*recip_approx" |