diff options
author | Eric Botcazou <ebotcazou@adacore.com> | 2024-05-08 10:07:56 +0200 |
---|---|---|
committer | Eric Botcazou <ebotcazou@adacore.com> | 2024-05-17 11:45:40 +0200 |
commit | f53f8a859631bef97adba1522a8049a8fce57c1b (patch) | |
tree | cc7bd05030603a58ccf070d26afc73891706eb26 | |
parent | b420e0b920613c42f63252aa2478a8315dc37a13 (diff) | |
download | gcc-f53f8a859631bef97adba1522a8049a8fce57c1b.zip gcc-f53f8a859631bef97adba1522a8049a8fce57c1b.tar.gz gcc-f53f8a859631bef97adba1522a8049a8fce57c1b.tar.bz2 |
Add widening expansion of MULT_HIGHPART_EXPR for integral modes
For integral modes the expansion of MULT_HIGHPART_EXPR requires the presence
of an {s,u}mul_highpart optab whereas, for vector modes, widening expansion
is supported. This adds a widening expansion for integral modes too, which
is in fact already implemented in expmed_mult_highpart_optab.
gcc/
* expmed.h (expmed_mult_highpart_optab): Declare.
* expmed.cc (expmed_mult_highpart_optab): Remove static keyword.
Do not assume that OP1 is a constant integer. Fix pasto.
(expmed_mult_highpart): Pass OP1 narrowed to MODE in all the calls
to expmed_mult_highpart_optab.
* optabs-query.cc (can_mult_highpart_p): Use 2 for integer widening
and shift subsequent values accordingly.
* optabs.cc (expand_mult_highpart): Call expmed_mult_highpart_optab
when can_mult_highpart_p returns 2 and adjust to above change.
-rw-r--r-- | gcc/expmed.cc | 54 | ||||
-rw-r--r-- | gcc/expmed.h | 2 | ||||
-rw-r--r-- | gcc/optabs-query.cc | 24 | ||||
-rw-r--r-- | gcc/optabs.cc | 7 |
4 files changed, 52 insertions, 35 deletions
diff --git a/gcc/expmed.cc b/gcc/expmed.cc index 248940f..50d2276 100644 --- a/gcc/expmed.cc +++ b/gcc/expmed.cc @@ -2748,8 +2748,7 @@ static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); static rtx extract_high_half (scalar_int_mode, rtx); static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int); -static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx, - int, int); + /* Compute and return the best algorithm for multiplying by T. The algorithm must cost less than cost_limit If retval.cost >= COST_LIMIT, no algorithm was found and all @@ -3856,30 +3855,25 @@ extract_high_half (scalar_int_mode mode, rtx op) return convert_modes (mode, wider_mode, op, 0); } -/* Like expmed_mult_highpart, but only consider using a multiplication - optab. OP1 is an rtx for the constant operand. */ +/* Like expmed_mult_highpart, but only consider using multiplication optab. */ -static rtx +rtx expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1, rtx target, int unsignedp, int max_cost) { - rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); + const scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); + const bool speed = optimize_insn_for_speed_p (); + const int size = GET_MODE_BITSIZE (mode); optab moptab; rtx tem; - int size; - bool speed = optimize_insn_for_speed_p (); - - scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); - - size = GET_MODE_BITSIZE (mode); /* Firstly, try using a multiplication insn that only generates the needed high part of the product, and in the sign flavor of unsignedp. */ if (mul_highpart_cost (speed, mode) < max_cost) { moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; - tem = expand_binop (mode, moptab, op0, narrow_op1, target, - unsignedp, OPTAB_DIRECT); + tem = expand_binop (mode, moptab, op0, op1, target, unsignedp, + OPTAB_DIRECT); if (tem) return tem; } @@ -3892,12 +3886,12 @@ expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1, + 4 * add_cost (speed, mode) < max_cost)) { moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; - tem = expand_binop (mode, moptab, op0, narrow_op1, target, - unsignedp, OPTAB_DIRECT); + tem = expand_binop (mode, moptab, op0, op1, target, !unsignedp, + OPTAB_DIRECT); if (tem) /* We used the wrong signedness. Adjust the result. */ - return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, - tem, unsignedp); + return expand_mult_highpart_adjust (mode, tem, op0, op1, tem, + unsignedp); } /* Try widening multiplication. */ @@ -3905,8 +3899,8 @@ expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1, if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing && mul_widen_cost (speed, wider_mode) < max_cost) { - tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, - unsignedp, OPTAB_WIDEN); + tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, unsignedp, + OPTAB_WIDEN); if (tem) return extract_high_half (mode, tem); } @@ -3947,14 +3941,14 @@ expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1, + 2 * shift_cost (speed, mode, size-1) + 4 * add_cost (speed, mode) < max_cost)) { - tem = expand_binop (wider_mode, moptab, op0, narrow_op1, - NULL_RTX, ! unsignedp, OPTAB_WIDEN); + tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, !unsignedp, + OPTAB_WIDEN); if (tem != 0) { tem = extract_high_half (mode, tem); /* We used the wrong signedness. Adjust the result. */ - return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, - target, unsignedp); + return expand_mult_highpart_adjust (mode, tem, op0, op1, target, + unsignedp); } } @@ -3976,18 +3970,19 @@ static rtx expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1, rtx target, int unsignedp, int max_cost) { + const bool speed = optimize_insn_for_speed_p (); unsigned HOST_WIDE_INT cnst1; int extra_cost; bool sign_adjust = false; enum mult_variant variant; struct algorithm alg; - rtx tem; - bool speed = optimize_insn_for_speed_p (); + rtx narrow_op1, tem; /* We can't support modes wider than HOST_BITS_PER_INT. */ gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); + narrow_op1 = gen_int_mode (INTVAL (op1), mode); /* We can't optimize modes wider than BITS_PER_WORD. ??? We might be able to perform double-word arithmetic if @@ -3995,7 +3990,7 @@ expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1, synth_mult etc. assume single-word operations. */ scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) - return expmed_mult_highpart_optab (mode, op0, op1, target, + return expmed_mult_highpart_optab (mode, op0, narrow_op1, target, unsignedp, max_cost); extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); @@ -4013,7 +4008,8 @@ expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1, { /* See whether the specialized multiplication optabs are cheaper than the shift/add version. */ - tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, + tem = expmed_mult_highpart_optab (mode, op0, narrow_op1, target, + unsignedp, alg.cost.cost + extra_cost); if (tem) return tem; @@ -4028,7 +4024,7 @@ expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1, return tem; } - return expmed_mult_highpart_optab (mode, op0, op1, target, + return expmed_mult_highpart_optab (mode, op0, narrow_op1, target, unsignedp, max_cost); } diff --git a/gcc/expmed.h b/gcc/expmed.h index f5375c8..0a19176 100644 --- a/gcc/expmed.h +++ b/gcc/expmed.h @@ -724,5 +724,7 @@ extern rtx extract_low_bits (machine_mode, machine_mode, rtx); extern rtx expand_mult (machine_mode, rtx, rtx, rtx, int, bool = false); extern rtx expand_mult_highpart_adjust (scalar_int_mode, rtx, rtx, rtx, rtx, int); +extern rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx, + int, int); #endif // EXPMED_H diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc index e36a150..de145be 100644 --- a/gcc/optabs-query.cc +++ b/gcc/optabs-query.cc @@ -502,19 +502,35 @@ find_widening_optab_handler_and_mode (optab op, machine_mode to_mode, return CODE_FOR_nothing; } -/* Return non-zero if a highpart multiply is supported of can be synthisized. +/* Return non-zero if a highpart multiply is supported or can be synthesized. For the benefit of expand_mult_highpart, the return value is 1 for direct, - 2 for even/odd widening, and 3 for hi/lo widening. */ + 2 for integral widening, 3 for even/odd widening, 4 for hi/lo widening. */ int can_mult_highpart_p (machine_mode mode, bool uns_p) { optab op; + scalar_int_mode int_mode; op = uns_p ? umul_highpart_optab : smul_highpart_optab; if (optab_handler (op, mode) != CODE_FOR_nothing) return 1; + /* If the mode is integral, synth from widening or larger operations. */ + if (is_a <scalar_int_mode> (mode, &int_mode)) + { + scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (int_mode).require (); + + op = uns_p ? umul_widen_optab : smul_widen_optab; + if (convert_optab_handler (op, wider_mode, mode) != CODE_FOR_nothing) + return 2; + + /* The test on the size comes from expmed_mult_highpart_optab. */ + if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing + && GET_MODE_BITSIZE (int_mode) - 1 < BITS_PER_WORD) + return 2; + } + /* If the mode is an integral vector, synth from widening operations. */ if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) return 0; @@ -535,7 +551,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) + ((i & 1) ? nunits : 0)); vec_perm_indices indices (sel, 2, nunits); if (can_vec_perm_const_p (mode, mode, indices)) - return 2; + return 3; } } @@ -551,7 +567,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); vec_perm_indices indices (sel, 2, nunits); if (can_vec_perm_const_p (mode, mode, indices)) - return 3; + return 4; } } diff --git a/gcc/optabs.cc b/gcc/optabs.cc index ce91f94..e791388 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -6751,10 +6751,13 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx op1, return expand_binop (mode, tab1, op0, op1, target, uns_p, OPTAB_LIB_WIDEN); case 2: + return expmed_mult_highpart_optab (as_a <scalar_int_mode> (mode), + op0, op1, target, uns_p, INT_MAX); + case 3: tab1 = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab; tab2 = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab; break; - case 3: + case 4: tab1 = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; tab2 = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab; if (BYTES_BIG_ENDIAN) @@ -6783,7 +6786,7 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx op1, m2 = gen_lowpart (mode, eops[0].value); vec_perm_builder sel; - if (method == 2) + if (method == 3) { /* The encoding has 2 interleaved stepped patterns. */ sel.new_vector (GET_MODE_NUNITS (mode), 2, 3); |