diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2019-06-10 09:57:15 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2021-04-29 04:45:20 -0700 |
commit | 985b3a6837dee7001e6b618f073ed74f0edf5787 (patch) | |
tree | 3607db433c496736c6a9cc22a90714994db0ca24 /gcc/expr.c | |
parent | af4ccaa7515b8e72449448c509916575831e6292 (diff) | |
download | gcc-985b3a6837dee7001e6b618f073ed74f0edf5787.zip gcc-985b3a6837dee7001e6b618f073ed74f0edf5787.tar.gz gcc-985b3a6837dee7001e6b618f073ed74f0edf5787.tar.bz2 |
Generate offset adjusted operation for op_by_pieces operations
Add an overlap_op_by_pieces_p target hook for op_by_pieces operations
between two areas of memory to generate one offset adjusted operation
in the smallest integer mode for the remaining bytes on the last piece
operation of a memory region to avoid doing more than one smaller
operations.
Pass the RTL information from the previous iteration to m_constfn in
op_by_pieces operation so that builtin_memset_[read|gen]_str can
generate the new RTL from the previous RTL.
Tested on Linux/x86-64.
gcc/
PR middle-end/90773
* builtins.c (builtin_memcpy_read_str): Add a dummy argument.
(builtin_strncpy_read_str): Likewise.
(builtin_memset_read_str): Add an argument for the previous RTL
information and generate the new RTL from the previous RTL info.
(builtin_memset_gen_str): Likewise.
* builtins.h (builtin_strncpy_read_str): Update the prototype.
(builtin_memset_read_str): Likewise.
* expr.c (by_pieces_ninsns): If targetm.overlap_op_by_pieces_p()
returns true, round up size and alignment to the widest integer
mode for maximum size.
(pieces_addr::adjust): Add a pointer to by_pieces_prev argument
and pass it to m_constfn.
(op_by_pieces_d): Add m_push and m_overlap_op_by_pieces.
(op_by_pieces_d::op_by_pieces_d): Add a bool argument to
initialize m_push. Initialize m_overlap_op_by_pieces with
targetm.overlap_op_by_pieces_p ().
(op_by_pieces_d::run): Pass the previous RTL information to
pieces_addr::adjust and generate overlapping operations if
m_overlap_op_by_pieces is true.
(PUSHG_P): New.
(move_by_pieces_d::move_by_pieces_d): Updated for op_by_pieces_d
change.
(store_by_pieces_d::store_by_pieces_d): Updated for op_by_pieces_d
change.
(can_store_by_pieces): Use by_pieces_constfn on constfun.
(store_by_pieces): Use by_pieces_constfn on constfun. Updated
for op_by_pieces_d change.
(clear_by_pieces_1): Add a dummy argument.
(clear_by_pieces): Updated for op_by_pieces_d change.
(compare_by_pieces_d::compare_by_pieces_d): Likewise.
(string_cst_read_str): Add a dummy argument.
* expr.h (by_pieces_constfn): Add a dummy argument.
(by_pieces_prev): New.
* target.def (overlap_op_by_pieces_p): New target hook.
* config/i386/i386.c (TARGET_OVERLAP_OP_BY_PIECES_P): New.
* doc/tm.texi.in: Add TARGET_OVERLAP_OP_BY_PIECES_P.
* doc/tm.texi: Regenerated.
gcc/testsuite/
PR middle-end/90773
* g++.dg/pr90773-1.h: New test.
* g++.dg/pr90773-1a.C: Likewise.
* g++.dg/pr90773-1b.C: Likewise.
* g++.dg/pr90773-1c.C: Likewise.
* g++.dg/pr90773-1d.C: Likewise.
* gcc.target/i386/pr90773-1.c: Likewise.
* gcc.target/i386/pr90773-2.c: Likewise.
* gcc.target/i386/pr90773-3.c: Likewise.
* gcc.target/i386/pr90773-4.c: Likewise.
* gcc.target/i386/pr90773-5.c: Likewise.
* gcc.target/i386/pr90773-6.c: Likewise.
* gcc.target/i386/pr90773-7.c: Likewise.
* gcc.target/i386/pr90773-8.c: Likewise.
* gcc.target/i386/pr90773-9.c: Likewise.
* gcc.target/i386/pr90773-10.c: Likewise.
* gcc.target/i386/pr90773-11.c: Likewise.
* gcc.target/i386/pr90773-12.c: Likewise.
* gcc.target/i386/pr90773-13.c: Likewise.
* gcc.target/i386/pr90773-14.c: Likewise.
Diffstat (limited to 'gcc/expr.c')
-rw-r--r-- | gcc/expr.c | 105 |
1 files changed, 85 insertions, 20 deletions
@@ -815,12 +815,27 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align, unsigned int max_size, by_pieces_operation op) { unsigned HOST_WIDE_INT n_insns = 0; + scalar_int_mode mode; + + if (targetm.overlap_op_by_pieces_p () && op != COMPARE_BY_PIECES) + { + /* NB: Round up L and ALIGN to the widest integer mode for + MAX_SIZE. */ + mode = widest_int_mode_for_size (max_size); + if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) + { + unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode)); + if (up > l) + l = up; + align = GET_MODE_ALIGNMENT (mode); + } + } align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align); while (max_size > 1 && l > 0) { - scalar_int_mode mode = widest_int_mode_for_size (max_size); + mode = widest_int_mode_for_size (max_size); enum insn_code icode; unsigned int modesize = GET_MODE_SIZE (mode); @@ -888,7 +903,8 @@ class pieces_addr void *m_cfndata; public: pieces_addr (rtx, bool, by_pieces_constfn, void *); - rtx adjust (scalar_int_mode, HOST_WIDE_INT); + rtx adjust (scalar_int_mode, HOST_WIDE_INT, + by_pieces_prev * = nullptr); void increment_address (HOST_WIDE_INT); void maybe_predec (HOST_WIDE_INT); void maybe_postinc (HOST_WIDE_INT); @@ -990,10 +1006,12 @@ pieces_addr::decide_autoinc (machine_mode ARG_UNUSED (mode), bool reverse, but we still modify the MEM's properties. */ rtx -pieces_addr::adjust (scalar_int_mode mode, HOST_WIDE_INT offset) +pieces_addr::adjust (scalar_int_mode mode, HOST_WIDE_INT offset, + by_pieces_prev *prev) { if (m_constfn) - return m_constfn (m_cfndata, offset, mode); + /* Pass the previous data to m_constfn. */ + return m_constfn (m_cfndata, prev, offset, mode); if (m_obj == NULL_RTX) return NULL_RTX; if (m_auto) @@ -1051,6 +1069,10 @@ class op_by_pieces_d unsigned int m_align; unsigned int m_max_size; bool m_reverse; + /* True if this is a stack push. */ + bool m_push; + /* True if targetm.overlap_op_by_pieces_p () returns true. */ + bool m_overlap_op_by_pieces; /* Virtual functions, overriden by derived classes for the specific operation. */ @@ -1062,7 +1084,7 @@ class op_by_pieces_d public: op_by_pieces_d (rtx, bool, rtx, bool, by_pieces_constfn, void *, - unsigned HOST_WIDE_INT, unsigned int); + unsigned HOST_WIDE_INT, unsigned int, bool); void run (); }; @@ -1077,10 +1099,11 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load, by_pieces_constfn from_cfn, void *from_cfn_data, unsigned HOST_WIDE_INT len, - unsigned int align) + unsigned int align, bool push) : m_to (to, to_load, NULL, NULL), m_from (from, from_load, from_cfn, from_cfn_data), - m_len (len), m_max_size (MOVE_MAX_PIECES + 1) + m_len (len), m_max_size (MOVE_MAX_PIECES + 1), + m_push (push) { int toi = m_to.get_addr_inc (); int fromi = m_from.get_addr_inc (); @@ -1109,6 +1132,8 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load, align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align); m_align = align; + + m_overlap_op_by_pieces = targetm.overlap_op_by_pieces_p (); } /* This function returns the largest usable integer mode for LEN bytes @@ -1145,6 +1170,9 @@ op_by_pieces_d::run () scalar_int_mode mode = widest_int_mode_for_size (m_max_size); mode = get_usable_mode (mode, m_len); + by_pieces_prev to_prev = { nullptr, mode }; + by_pieces_prev from_prev = { nullptr, mode }; + do { unsigned int size = GET_MODE_SIZE (mode); @@ -1155,8 +1183,12 @@ op_by_pieces_d::run () if (m_reverse) m_offset -= size; - to1 = m_to.adjust (mode, m_offset); - from1 = m_from.adjust (mode, m_offset); + to1 = m_to.adjust (mode, m_offset, &to_prev); + to_prev.data = to1; + to_prev.mode = mode; + from1 = m_from.adjust (mode, m_offset, &from_prev); + from_prev.data = from1; + from_prev.mode = mode; m_to.maybe_predec (-(HOST_WIDE_INT)size); m_from.maybe_predec (-(HOST_WIDE_INT)size); @@ -1177,9 +1209,32 @@ op_by_pieces_d::run () if (m_len == 0) return; - /* NB: widest_int_mode_for_size checks SIZE > 1. */ - mode = widest_int_mode_for_size (size); - mode = get_usable_mode (mode, m_len); + if (!m_push && m_overlap_op_by_pieces) + { + /* NB: Generate overlapping operations if it is not a stack + push since stack push must not overlap. Get the smallest + integer mode for M_LEN bytes. */ + mode = smallest_int_mode_for_size (m_len * BITS_PER_UNIT); + mode = get_usable_mode (mode, GET_MODE_SIZE (mode)); + int gap = GET_MODE_SIZE (mode) - m_len; + if (gap > 0) + { + /* If size of MODE > M_LEN, generate the last operation + in MODE for the remaining bytes with ovelapping memory + from the previois operation. */ + if (m_reverse) + m_offset += gap; + else + m_offset -= gap; + m_len += gap; + } + } + else + { + /* NB: widest_int_mode_for_size checks SIZE > 1. */ + mode = widest_int_mode_for_size (size); + mode = get_usable_mode (mode, m_len); + } } while (1); @@ -1190,6 +1245,12 @@ op_by_pieces_d::run () /* Derived class from op_by_pieces_d, providing support for block move operations. */ +#ifdef PUSH_ROUNDING +#define PUSHG_P(to) ((to) == nullptr) +#else +#define PUSHG_P(to) false +#endif + class move_by_pieces_d : public op_by_pieces_d { insn_gen_fn m_gen_fun; @@ -1199,7 +1260,8 @@ class move_by_pieces_d : public op_by_pieces_d public: move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, unsigned int align) - : op_by_pieces_d (to, false, from, true, NULL, NULL, len, align) + : op_by_pieces_d (to, false, from, true, NULL, NULL, len, align, + PUSHG_P (to)) { } rtx finish_retmode (memop_ret); @@ -1294,7 +1356,8 @@ class store_by_pieces_d : public op_by_pieces_d public: store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, unsigned HOST_WIDE_INT len, unsigned int align) - : op_by_pieces_d (to, false, NULL_RTX, true, cfn, cfn_data, len, align) + : op_by_pieces_d (to, false, NULL_RTX, true, cfn, cfn_data, len, + align, false) { } rtx finish_retmode (memop_ret); @@ -1349,7 +1412,7 @@ store_by_pieces_d::finish_retmode (memop_ret retmode) int can_store_by_pieces (unsigned HOST_WIDE_INT len, - rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode), + by_pieces_constfn constfun, void *constfundata, unsigned int align, bool memsetp) { unsigned HOST_WIDE_INT l; @@ -1396,7 +1459,7 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, if (reverse) offset -= size; - cst = (*constfun) (constfundata, offset, mode); + cst = (*constfun) (constfundata, nullptr, offset, mode); if (!targetm.legitimate_constant_p (mode, cst)) return 0; @@ -1426,7 +1489,7 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, rtx store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, - rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode), + by_pieces_constfn constfun, void *constfundata, unsigned int align, bool memsetp, memop_ret retmode) { @@ -1454,7 +1517,7 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, Return const0_rtx unconditionally. */ static rtx -clear_by_pieces_1 (void *, HOST_WIDE_INT, scalar_int_mode) +clear_by_pieces_1 (void *, void *, HOST_WIDE_INT, scalar_int_mode) { return const0_rtx; } @@ -1490,7 +1553,8 @@ class compare_by_pieces_d : public op_by_pieces_d compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, void *op1_cfn_data, HOST_WIDE_INT len, int align, rtx_code_label *fail_label) - : op_by_pieces_d (op0, true, op1, true, op1_cfn, op1_cfn_data, len, align) + : op_by_pieces_d (op0, true, op1, true, op1_cfn, op1_cfn_data, len, + align, false) { m_fail_label = fail_label; } @@ -5676,7 +5740,8 @@ emit_storent_insn (rtx to, rtx from) /* Helper function for store_expr storing of STRING_CST. */ static rtx -string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode) +string_cst_read_str (void *data, void *, HOST_WIDE_INT offset, + scalar_int_mode mode) { tree str = (tree) data; |