diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-03-06 06:38:21 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2021-07-30 10:34:19 -0700 |
commit | e5e164effa30fd2b5c5bc3e6883d63889e96d8da (patch) | |
tree | 2bc5b9da149ccd05f459cebc9086397430a4cccb /gcc/builtins.c | |
parent | d68d275a00573be49f5e83eba52ce3f26d11db9e (diff) | |
download | gcc-e5e164effa30fd2b5c5bc3e6883d63889e96d8da.zip gcc-e5e164effa30fd2b5c5bc3e6883d63889e96d8da.tar.gz gcc-e5e164effa30fd2b5c5bc3e6883d63889e96d8da.tar.bz2 |
Add QI vector mode support to by-pieces for memset
1. Replace scalar_int_mode with fixed_size_mode in the by-pieces
infrastructure to allow non-integer mode.
2. Rename widest_int_mode_for_size to widest_fixed_size_mode_for_size
to return QI vector mode for memset.
3. Add op_by_pieces_d::smallest_fixed_size_mode_for_size to return the
smallest integer or QI vector mode.
4. Remove clear_by_pieces_1 and use builtin_memset_read_str in
clear_by_pieces to support vector mode broadcast.
5. Add lowpart_subreg_regno, a wrapper around simplify_subreg_regno that
uses subreg_lowpart_offset (mode, prev_mode) as the offset.
6. Add TARGET_GEN_MEMSET_SCRATCH_RTX to allow the backend to use a hard
scratch register to avoid stack realignment when expanding memset.
gcc/
PR middle-end/90773
* builtins.c (builtin_memcpy_read_str): Change the mode argument
from scalar_int_mode to fixed_size_mode.
(builtin_strncpy_read_str): Likewise.
(gen_memset_value_from_prev): New function.
(builtin_memset_read_str): Change the mode argument from
scalar_int_mode to fixed_size_mode. Use gen_memset_value_from_prev
and support CONST_VECTOR.
(builtin_memset_gen_str): Likewise.
(try_store_by_multiple_pieces): Use by_pieces_constfn to declare
constfun.
* builtins.h (builtin_strncpy_read_str): Replace scalar_int_mode
with fixed_size_mode.
(builtin_memset_read_str): Likewise.
* expr.c (widest_int_mode_for_size): Renamed to ...
(widest_fixed_size_mode_for_size): Add a bool argument to
indicate if QI vector mode can be used.
(by_pieces_ninsns): Call widest_fixed_size_mode_for_size
instead of widest_int_mode_for_size.
(pieces_addr::adjust): Change the mode argument from
scalar_int_mode to fixed_size_mode.
(op_by_pieces_d): Make m_len read-only. Add a bool member,
m_qi_vector_mode, to indicate that QI vector mode can be used.
(op_by_pieces_d::op_by_pieces_d): Add a bool argument to
initialize m_qi_vector_mode. Call widest_fixed_size_mode_for_size
instead of widest_int_mode_for_size.
(op_by_pieces_d::get_usable_mode): Change the mode argument from
scalar_int_mode to fixed_size_mode. Call
widest_fixed_size_mode_for_size instead of
widest_int_mode_for_size.
(op_by_pieces_d::smallest_fixed_size_mode_for_size): New member
function to return the smallest integer or QI vector mode.
(op_by_pieces_d::run): Call widest_fixed_size_mode_for_size
instead of widest_int_mode_for_size. Call
smallest_fixed_size_mode_for_size instead of
smallest_int_mode_for_size.
(store_by_pieces_d::store_by_pieces_d): Add a bool argument to
indicate that QI vector mode can be used and pass it to
op_by_pieces_d::op_by_pieces_d.
(can_store_by_pieces): Call widest_fixed_size_mode_for_size
instead of widest_int_mode_for_size. Pass memsetp to
widest_fixed_size_mode_for_size to support QI vector mode.
Allow all CONST_VECTORs for memset if vec_duplicate is supported.
(store_by_pieces): Pass memsetp to
store_by_pieces_d::store_by_pieces_d.
(clear_by_pieces_1): Removed.
(clear_by_pieces): Replace clear_by_pieces_1 with
builtin_memset_read_str and pass true to store_by_pieces_d to
support vector mode broadcast.
(string_cst_read_str): Change the mode argument from
scalar_int_mode to fixed_size_mode.
* expr.h (by_pieces_constfn): Change scalar_int_mode to
fixed_size_mode.
(by_pieces_prev): Likewise.
* rtl.h (lowpart_subreg_regno): New.
* rtlanal.c (lowpart_subreg_regno): New. A wrapper around
simplify_subreg_regno.
* target.def (gen_memset_scratch_rtx): New hook.
* doc/tm.texi.in: Add TARGET_GEN_MEMSET_SCRATCH_RTX.
* doc/tm.texi: Regenerated.
gcc/testsuite/
* gcc.target/i386/pr100865-3.c: Expect vmovdqu8 instead of
vmovdqu.
* gcc.target/i386/pr100865-4b.c: Likewise.
Diffstat (limited to 'gcc/builtins.c')
-rw-r--r-- | gcc/builtins.c | 171 |
1 files changed, 140 insertions, 31 deletions
diff --git a/gcc/builtins.c b/gcc/builtins.c index 845a8bb..2387b5d 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -3119,13 +3119,16 @@ expand_builtin_strnlen (tree exp, rtx target, machine_mode target_mode) static rtx builtin_memcpy_read_str (void *data, void *, HOST_WIDE_INT offset, - scalar_int_mode mode) + fixed_size_mode mode) { /* The REPresentation pointed to by DATA need not be a nul-terminated string but the caller guarantees it's large enough for MODE. */ const char *rep = (const char *) data; - return c_readstr (rep + offset, mode, /*nul_terminated=*/false); + /* The by-pieces infrastructure does not try to pick a vector mode + for memcpy expansion. */ + return c_readstr (rep + offset, as_a <scalar_int_mode> (mode), + /*nul_terminated=*/false); } /* LEN specify length of the block of memcpy/memset operation. @@ -3742,14 +3745,16 @@ expand_builtin_stpncpy (tree exp, rtx) rtx builtin_strncpy_read_str (void *data, void *, HOST_WIDE_INT offset, - scalar_int_mode mode) + fixed_size_mode mode) { const char *str = (const char *) data; if ((unsigned HOST_WIDE_INT) offset > strlen (str)) return const0_rtx; - return c_readstr (str + offset, mode); + /* The by-pieces infrastructure does not try to pick a vector mode + for strncpy expansion. */ + return c_readstr (str + offset, as_a <scalar_int_mode> (mode)); } /* Helper to check the sizes of sequences and the destination of calls @@ -3950,30 +3955,122 @@ expand_builtin_strncpy (tree exp, rtx target) return NULL_RTX; } -/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) - bytes from constant string DATA + OFFSET and return it as target - constant. If PREV isn't nullptr, it has the RTL info from the +/* Return the RTL of a register in MODE generated from PREV in the previous iteration. */ -rtx -builtin_memset_read_str (void *data, void *prevp, - HOST_WIDE_INT offset ATTRIBUTE_UNUSED, - scalar_int_mode mode) +static rtx +gen_memset_value_from_prev (by_pieces_prev *prev, fixed_size_mode mode) { - by_pieces_prev *prev = (by_pieces_prev *) prevp; + rtx target = nullptr; if (prev != nullptr && prev->data != nullptr) { /* Use the previous data in the same mode. */ if (prev->mode == mode) return prev->data; + + fixed_size_mode prev_mode = prev->mode; + + /* Don't use the previous data to write QImode if it is in a + vector mode. */ + if (VECTOR_MODE_P (prev_mode) && mode == QImode) + return target; + + rtx prev_rtx = prev->data; + + if (REG_P (prev_rtx) + && HARD_REGISTER_P (prev_rtx) + && lowpart_subreg_regno (REGNO (prev_rtx), prev_mode, mode) < 0) + { + /* This case occurs when PREV_MODE is a vector and when + MODE is too small to store using vector operations. + After register allocation, the code will need to move the + lowpart of the vector register into a non-vector register. + + Also, the target has chosen to use a hard register + instead of going with the default choice of using a + pseudo register. We should respect that choice and try to + avoid creating a pseudo register with the same mode as the + current hard register. + + In principle, we could just use a lowpart MODE subreg of + the vector register. However, the vector register mode might + be too wide for non-vector registers, and we already know + that the non-vector mode is too small for vector registers. + It's therefore likely that we'd need to spill to memory in + the vector mode and reload the non-vector value from there. + + Try to avoid that by reducing the vector register to the + smallest size that it can hold. This should increase the + chances that non-vector registers can hold both the inner + and outer modes of the subreg that we generate later. */ + machine_mode m; + fixed_size_mode candidate; + FOR_EACH_MODE_IN_CLASS (m, GET_MODE_CLASS (mode)) + if (is_a<fixed_size_mode> (m, &candidate)) + { + if (GET_MODE_SIZE (candidate) + >= GET_MODE_SIZE (prev_mode)) + break; + if (GET_MODE_SIZE (candidate) >= GET_MODE_SIZE (mode) + && lowpart_subreg_regno (REGNO (prev_rtx), + prev_mode, candidate) >= 0) + { + target = lowpart_subreg (candidate, prev_rtx, + prev_mode); + prev_rtx = target; + prev_mode = candidate; + break; + } + } + if (target == nullptr) + prev_rtx = copy_to_reg (prev_rtx); + } + + target = lowpart_subreg (mode, prev_rtx, prev_mode); } + return target; +} +/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) + bytes from constant string DATA + OFFSET and return it as target + constant. If PREV isn't nullptr, it has the RTL info from the + previous iteration. */ + +rtx +builtin_memset_read_str (void *data, void *prev, + HOST_WIDE_INT offset ATTRIBUTE_UNUSED, + fixed_size_mode mode) +{ const char *c = (const char *) data; - char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode)); + unsigned int size = GET_MODE_SIZE (mode); + + rtx target = gen_memset_value_from_prev ((by_pieces_prev *) prev, + mode); + if (target != nullptr) + return target; + rtx src = gen_int_mode (*c, QImode); - memset (p, *c, GET_MODE_SIZE (mode)); + if (VECTOR_MODE_P (mode)) + { + gcc_assert (GET_MODE_INNER (mode) == QImode); + + rtx const_vec = gen_const_vec_duplicate (mode, src); + if (prev == NULL) + /* Return CONST_VECTOR when called by a query function. */ + return const_vec; + + /* Use the move expander with CONST_VECTOR. */ + target = targetm.gen_memset_scratch_rtx (mode); + emit_move_insn (target, const_vec); + return target; + } + + char *p = XALLOCAVEC (char, size); + + memset (p, *c, size); - return c_readstr (p, mode); + /* Vector modes should be handled above. */ + return c_readstr (p, as_a <scalar_int_mode> (mode)); } /* Callback routine for store_by_pieces. Return the RTL of a register @@ -3983,33 +4080,45 @@ builtin_memset_read_str (void *data, void *prevp, nullptr, it has the RTL info from the previous iteration. */ static rtx -builtin_memset_gen_str (void *data, void *prevp, +builtin_memset_gen_str (void *data, void *prev, HOST_WIDE_INT offset ATTRIBUTE_UNUSED, - scalar_int_mode mode) + fixed_size_mode mode) { rtx target, coeff; size_t size; char *p; - by_pieces_prev *prev = (by_pieces_prev *) prevp; - if (prev != nullptr && prev->data != nullptr) - { - /* Use the previous data in the same mode. */ - if (prev->mode == mode) - return prev->data; - - target = simplify_gen_subreg (mode, prev->data, prev->mode, 0); - if (target != nullptr) - return target; - } - size = GET_MODE_SIZE (mode); if (size == 1) return (rtx) data; + target = gen_memset_value_from_prev ((by_pieces_prev *) prev, mode); + if (target != nullptr) + return target; + + if (VECTOR_MODE_P (mode)) + { + gcc_assert (GET_MODE_INNER (mode) == QImode); + + /* vec_duplicate_optab is a precondition to pick a vector mode for + the memset expander. */ + insn_code icode = optab_handler (vec_duplicate_optab, mode); + + target = targetm.gen_memset_scratch_rtx (mode); + class expand_operand ops[2]; + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], (rtx) data, QImode); + expand_insn (icode, 2, ops); + if (!rtx_equal_p (target, ops[0].value)) + emit_move_insn (target, ops[0].value); + + return target; + } + p = XALLOCAVEC (char, size); memset (p, 1, size); - coeff = c_readstr (p, mode); + /* Vector modes should be handled above. */ + coeff = c_readstr (p, as_a <scalar_int_mode> (mode)); target = convert_to_mode (mode, (rtx) data, 1); target = expand_mult (mode, target, coeff, NULL_RTX, 1); @@ -4113,7 +4222,7 @@ try_store_by_multiple_pieces (rtx to, rtx len, unsigned int ctz_len, &valc, align, true)) return false; - rtx (*constfun) (void *, void *, HOST_WIDE_INT, scalar_int_mode); + by_pieces_constfn constfun; void *constfundata; if (val) { |