aboutsummaryrefslogtreecommitdiff
path: root/gcc/builtins.c
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-03-06 06:38:21 -0800
committerH.J. Lu <hjl.tools@gmail.com>2021-07-30 10:34:19 -0700
commite5e164effa30fd2b5c5bc3e6883d63889e96d8da (patch)
tree2bc5b9da149ccd05f459cebc9086397430a4cccb /gcc/builtins.c
parentd68d275a00573be49f5e83eba52ce3f26d11db9e (diff)
downloadgcc-e5e164effa30fd2b5c5bc3e6883d63889e96d8da.zip
gcc-e5e164effa30fd2b5c5bc3e6883d63889e96d8da.tar.gz
gcc-e5e164effa30fd2b5c5bc3e6883d63889e96d8da.tar.bz2
Add QI vector mode support to by-pieces for memset
1. Replace scalar_int_mode with fixed_size_mode in the by-pieces infrastructure to allow non-integer mode. 2. Rename widest_int_mode_for_size to widest_fixed_size_mode_for_size to return QI vector mode for memset. 3. Add op_by_pieces_d::smallest_fixed_size_mode_for_size to return the smallest integer or QI vector mode. 4. Remove clear_by_pieces_1 and use builtin_memset_read_str in clear_by_pieces to support vector mode broadcast. 5. Add lowpart_subreg_regno, a wrapper around simplify_subreg_regno that uses subreg_lowpart_offset (mode, prev_mode) as the offset. 6. Add TARGET_GEN_MEMSET_SCRATCH_RTX to allow the backend to use a hard scratch register to avoid stack realignment when expanding memset. gcc/ PR middle-end/90773 * builtins.c (builtin_memcpy_read_str): Change the mode argument from scalar_int_mode to fixed_size_mode. (builtin_strncpy_read_str): Likewise. (gen_memset_value_from_prev): New function. (builtin_memset_read_str): Change the mode argument from scalar_int_mode to fixed_size_mode. Use gen_memset_value_from_prev and support CONST_VECTOR. (builtin_memset_gen_str): Likewise. (try_store_by_multiple_pieces): Use by_pieces_constfn to declare constfun. * builtins.h (builtin_strncpy_read_str): Replace scalar_int_mode with fixed_size_mode. (builtin_memset_read_str): Likewise. * expr.c (widest_int_mode_for_size): Renamed to ... (widest_fixed_size_mode_for_size): Add a bool argument to indicate if QI vector mode can be used. (by_pieces_ninsns): Call widest_fixed_size_mode_for_size instead of widest_int_mode_for_size. (pieces_addr::adjust): Change the mode argument from scalar_int_mode to fixed_size_mode. (op_by_pieces_d): Make m_len read-only. Add a bool member, m_qi_vector_mode, to indicate that QI vector mode can be used. (op_by_pieces_d::op_by_pieces_d): Add a bool argument to initialize m_qi_vector_mode. Call widest_fixed_size_mode_for_size instead of widest_int_mode_for_size. (op_by_pieces_d::get_usable_mode): Change the mode argument from scalar_int_mode to fixed_size_mode. Call widest_fixed_size_mode_for_size instead of widest_int_mode_for_size. (op_by_pieces_d::smallest_fixed_size_mode_for_size): New member function to return the smallest integer or QI vector mode. (op_by_pieces_d::run): Call widest_fixed_size_mode_for_size instead of widest_int_mode_for_size. Call smallest_fixed_size_mode_for_size instead of smallest_int_mode_for_size. (store_by_pieces_d::store_by_pieces_d): Add a bool argument to indicate that QI vector mode can be used and pass it to op_by_pieces_d::op_by_pieces_d. (can_store_by_pieces): Call widest_fixed_size_mode_for_size instead of widest_int_mode_for_size. Pass memsetp to widest_fixed_size_mode_for_size to support QI vector mode. Allow all CONST_VECTORs for memset if vec_duplicate is supported. (store_by_pieces): Pass memsetp to store_by_pieces_d::store_by_pieces_d. (clear_by_pieces_1): Removed. (clear_by_pieces): Replace clear_by_pieces_1 with builtin_memset_read_str and pass true to store_by_pieces_d to support vector mode broadcast. (string_cst_read_str): Change the mode argument from scalar_int_mode to fixed_size_mode. * expr.h (by_pieces_constfn): Change scalar_int_mode to fixed_size_mode. (by_pieces_prev): Likewise. * rtl.h (lowpart_subreg_regno): New. * rtlanal.c (lowpart_subreg_regno): New. A wrapper around simplify_subreg_regno. * target.def (gen_memset_scratch_rtx): New hook. * doc/tm.texi.in: Add TARGET_GEN_MEMSET_SCRATCH_RTX. * doc/tm.texi: Regenerated. gcc/testsuite/ * gcc.target/i386/pr100865-3.c: Expect vmovdqu8 instead of vmovdqu. * gcc.target/i386/pr100865-4b.c: Likewise.
Diffstat (limited to 'gcc/builtins.c')
-rw-r--r--gcc/builtins.c171
1 files changed, 140 insertions, 31 deletions
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 845a8bb..2387b5d 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -3119,13 +3119,16 @@ expand_builtin_strnlen (tree exp, rtx target, machine_mode target_mode)
static rtx
builtin_memcpy_read_str (void *data, void *, HOST_WIDE_INT offset,
- scalar_int_mode mode)
+ fixed_size_mode mode)
{
/* The REPresentation pointed to by DATA need not be a nul-terminated
string but the caller guarantees it's large enough for MODE. */
const char *rep = (const char *) data;
- return c_readstr (rep + offset, mode, /*nul_terminated=*/false);
+ /* The by-pieces infrastructure does not try to pick a vector mode
+ for memcpy expansion. */
+ return c_readstr (rep + offset, as_a <scalar_int_mode> (mode),
+ /*nul_terminated=*/false);
}
/* LEN specify length of the block of memcpy/memset operation.
@@ -3742,14 +3745,16 @@ expand_builtin_stpncpy (tree exp, rtx)
rtx
builtin_strncpy_read_str (void *data, void *, HOST_WIDE_INT offset,
- scalar_int_mode mode)
+ fixed_size_mode mode)
{
const char *str = (const char *) data;
if ((unsigned HOST_WIDE_INT) offset > strlen (str))
return const0_rtx;
- return c_readstr (str + offset, mode);
+ /* The by-pieces infrastructure does not try to pick a vector mode
+ for strncpy expansion. */
+ return c_readstr (str + offset, as_a <scalar_int_mode> (mode));
}
/* Helper to check the sizes of sequences and the destination of calls
@@ -3950,30 +3955,122 @@ expand_builtin_strncpy (tree exp, rtx target)
return NULL_RTX;
}
-/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE)
- bytes from constant string DATA + OFFSET and return it as target
- constant. If PREV isn't nullptr, it has the RTL info from the
+/* Return the RTL of a register in MODE generated from PREV in the
previous iteration. */
-rtx
-builtin_memset_read_str (void *data, void *prevp,
- HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
- scalar_int_mode mode)
+static rtx
+gen_memset_value_from_prev (by_pieces_prev *prev, fixed_size_mode mode)
{
- by_pieces_prev *prev = (by_pieces_prev *) prevp;
+ rtx target = nullptr;
if (prev != nullptr && prev->data != nullptr)
{
/* Use the previous data in the same mode. */
if (prev->mode == mode)
return prev->data;
+
+ fixed_size_mode prev_mode = prev->mode;
+
+ /* Don't use the previous data to write QImode if it is in a
+ vector mode. */
+ if (VECTOR_MODE_P (prev_mode) && mode == QImode)
+ return target;
+
+ rtx prev_rtx = prev->data;
+
+ if (REG_P (prev_rtx)
+ && HARD_REGISTER_P (prev_rtx)
+ && lowpart_subreg_regno (REGNO (prev_rtx), prev_mode, mode) < 0)
+ {
+ /* This case occurs when PREV_MODE is a vector and when
+ MODE is too small to store using vector operations.
+ After register allocation, the code will need to move the
+ lowpart of the vector register into a non-vector register.
+
+ Also, the target has chosen to use a hard register
+ instead of going with the default choice of using a
+ pseudo register. We should respect that choice and try to
+ avoid creating a pseudo register with the same mode as the
+ current hard register.
+
+ In principle, we could just use a lowpart MODE subreg of
+ the vector register. However, the vector register mode might
+ be too wide for non-vector registers, and we already know
+ that the non-vector mode is too small for vector registers.
+ It's therefore likely that we'd need to spill to memory in
+ the vector mode and reload the non-vector value from there.
+
+ Try to avoid that by reducing the vector register to the
+ smallest size that it can hold. This should increase the
+ chances that non-vector registers can hold both the inner
+ and outer modes of the subreg that we generate later. */
+ machine_mode m;
+ fixed_size_mode candidate;
+ FOR_EACH_MODE_IN_CLASS (m, GET_MODE_CLASS (mode))
+ if (is_a<fixed_size_mode> (m, &candidate))
+ {
+ if (GET_MODE_SIZE (candidate)
+ >= GET_MODE_SIZE (prev_mode))
+ break;
+ if (GET_MODE_SIZE (candidate) >= GET_MODE_SIZE (mode)
+ && lowpart_subreg_regno (REGNO (prev_rtx),
+ prev_mode, candidate) >= 0)
+ {
+ target = lowpart_subreg (candidate, prev_rtx,
+ prev_mode);
+ prev_rtx = target;
+ prev_mode = candidate;
+ break;
+ }
+ }
+ if (target == nullptr)
+ prev_rtx = copy_to_reg (prev_rtx);
+ }
+
+ target = lowpart_subreg (mode, prev_rtx, prev_mode);
}
+ return target;
+}
+/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE)
+ bytes from constant string DATA + OFFSET and return it as target
+ constant. If PREV isn't nullptr, it has the RTL info from the
+ previous iteration. */
+
+rtx
+builtin_memset_read_str (void *data, void *prev,
+ HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
+ fixed_size_mode mode)
+{
const char *c = (const char *) data;
- char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
+ unsigned int size = GET_MODE_SIZE (mode);
+
+ rtx target = gen_memset_value_from_prev ((by_pieces_prev *) prev,
+ mode);
+ if (target != nullptr)
+ return target;
+ rtx src = gen_int_mode (*c, QImode);
- memset (p, *c, GET_MODE_SIZE (mode));
+ if (VECTOR_MODE_P (mode))
+ {
+ gcc_assert (GET_MODE_INNER (mode) == QImode);
+
+ rtx const_vec = gen_const_vec_duplicate (mode, src);
+ if (prev == NULL)
+ /* Return CONST_VECTOR when called by a query function. */
+ return const_vec;
+
+ /* Use the move expander with CONST_VECTOR. */
+ target = targetm.gen_memset_scratch_rtx (mode);
+ emit_move_insn (target, const_vec);
+ return target;
+ }
+
+ char *p = XALLOCAVEC (char, size);
+
+ memset (p, *c, size);
- return c_readstr (p, mode);
+ /* Vector modes should be handled above. */
+ return c_readstr (p, as_a <scalar_int_mode> (mode));
}
/* Callback routine for store_by_pieces. Return the RTL of a register
@@ -3983,33 +4080,45 @@ builtin_memset_read_str (void *data, void *prevp,
nullptr, it has the RTL info from the previous iteration. */
static rtx
-builtin_memset_gen_str (void *data, void *prevp,
+builtin_memset_gen_str (void *data, void *prev,
HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
- scalar_int_mode mode)
+ fixed_size_mode mode)
{
rtx target, coeff;
size_t size;
char *p;
- by_pieces_prev *prev = (by_pieces_prev *) prevp;
- if (prev != nullptr && prev->data != nullptr)
- {
- /* Use the previous data in the same mode. */
- if (prev->mode == mode)
- return prev->data;
-
- target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
- if (target != nullptr)
- return target;
- }
-
size = GET_MODE_SIZE (mode);
if (size == 1)
return (rtx) data;
+ target = gen_memset_value_from_prev ((by_pieces_prev *) prev, mode);
+ if (target != nullptr)
+ return target;
+
+ if (VECTOR_MODE_P (mode))
+ {
+ gcc_assert (GET_MODE_INNER (mode) == QImode);
+
+ /* vec_duplicate_optab is a precondition to pick a vector mode for
+ the memset expander. */
+ insn_code icode = optab_handler (vec_duplicate_optab, mode);
+
+ target = targetm.gen_memset_scratch_rtx (mode);
+ class expand_operand ops[2];
+ create_output_operand (&ops[0], target, mode);
+ create_input_operand (&ops[1], (rtx) data, QImode);
+ expand_insn (icode, 2, ops);
+ if (!rtx_equal_p (target, ops[0].value))
+ emit_move_insn (target, ops[0].value);
+
+ return target;
+ }
+
p = XALLOCAVEC (char, size);
memset (p, 1, size);
- coeff = c_readstr (p, mode);
+ /* Vector modes should be handled above. */
+ coeff = c_readstr (p, as_a <scalar_int_mode> (mode));
target = convert_to_mode (mode, (rtx) data, 1);
target = expand_mult (mode, target, coeff, NULL_RTX, 1);
@@ -4113,7 +4222,7 @@ try_store_by_multiple_pieces (rtx to, rtx len, unsigned int ctz_len,
&valc, align, true))
return false;
- rtx (*constfun) (void *, void *, HOST_WIDE_INT, scalar_int_mode);
+ by_pieces_constfn constfun;
void *constfundata;
if (val)
{