diff options
author | Thomas Koenig <tkoenig@gcc.gnu.org> | 2021-09-13 19:49:49 +0200 |
---|---|---|
committer | Thomas Koenig <tkoenig@gcc.gnu.org> | 2021-09-13 19:49:49 +0200 |
commit | b18a97e5dd0935e1c4a626c230f21457d0aad3d5 (patch) | |
tree | c1818f41af6fe780deafb6cd6a183f32085fe654 /gcc/expr.c | |
parent | e76a53644c9d70e998c0d050e9a456af388c6b61 (diff) | |
download | gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.zip gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.tar.gz gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.tar.bz2 |
Merged current trunk to branch.
Diffstat (limited to 'gcc/expr.c')
-rw-r--r-- | gcc/expr.c | 705 |
1 files changed, 510 insertions, 195 deletions
@@ -1,5 +1,5 @@ /* Convert tree expression to rtl instructions, for GNU compiler. - Copyright (C) 1988-2020 Free Software Foundation, Inc. + Copyright (C) 1988-2021 Free Software Foundation, Inc. This file is part of GCC. @@ -62,6 +62,8 @@ along with GCC; see the file COPYING3. If not see #include "ccmp.h" #include "gimple-fold.h" #include "rtx-vector-builder.h" +#include "tree-pretty-print.h" +#include "flags.h" /* If this is nonzero, we do not bother generating VOLATILE @@ -234,8 +236,27 @@ convert_move (rtx to, rtx from, int unsignedp) >= GET_MODE_PRECISION (to_int_mode)) && SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp)) { + scalar_int_mode int_orig_mode; + scalar_int_mode int_inner_mode; + machine_mode orig_mode = GET_MODE (from); + from = gen_lowpart (to_int_mode, SUBREG_REG (from)); from_mode = to_int_mode; + + /* Preserve SUBREG_PROMOTED_VAR_P if the new mode is wider than + the original mode, but narrower than the inner mode. */ + if (GET_CODE (from) == SUBREG + && is_a <scalar_int_mode> (orig_mode, &int_orig_mode) + && GET_MODE_PRECISION (to_int_mode) + > GET_MODE_PRECISION (int_orig_mode) + && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (from)), + &int_inner_mode) + && GET_MODE_PRECISION (int_inner_mode) + > GET_MODE_PRECISION (to_int_mode)) + { + SUBREG_PROMOTED_VAR_P (from) = 1; + SUBREG_PROMOTED_SET (from, unsignedp); + } } gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to)); @@ -686,7 +707,27 @@ convert_modes (machine_mode mode, machine_mode oldmode, rtx x, int unsignedp) && (GET_MODE_PRECISION (subreg_promoted_mode (x)) >= GET_MODE_PRECISION (int_mode)) && SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp)) - x = gen_lowpart (int_mode, SUBREG_REG (x)); + { + scalar_int_mode int_orig_mode; + scalar_int_mode int_inner_mode; + machine_mode orig_mode = GET_MODE (x); + x = gen_lowpart (int_mode, SUBREG_REG (x)); + + /* Preserve SUBREG_PROMOTED_VAR_P if the new mode is wider than + the original mode, but narrower than the inner mode. */ + if (GET_CODE (x) == SUBREG + && is_a <scalar_int_mode> (orig_mode, &int_orig_mode) + && GET_MODE_PRECISION (int_mode) + > GET_MODE_PRECISION (int_orig_mode) + && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (x)), + &int_inner_mode) + && GET_MODE_PRECISION (int_inner_mode) + > GET_MODE_PRECISION (int_mode)) + { + SUBREG_PROMOTED_VAR_P (x) = 1; + SUBREG_PROMOTED_SET (x, unsignedp); + } + } if (GET_MODE (x) != VOIDmode) oldmode = GET_MODE (x); @@ -744,7 +785,7 @@ static unsigned int alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align) { scalar_int_mode tmode - = int_mode_for_size (max_pieces * BITS_PER_UNIT, 1).require (); + = int_mode_for_size (max_pieces * BITS_PER_UNIT, 0).require (); if (align >= GET_MODE_ALIGNMENT (tmode)) align = GET_MODE_ALIGNMENT (tmode); @@ -767,15 +808,36 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align) return align; } -/* Return the widest integer mode that is narrower than SIZE bytes. */ +/* Return the widest QI vector, if QI_MODE is true, or integer mode + that is narrower than SIZE bytes. */ -static scalar_int_mode -widest_int_mode_for_size (unsigned int size) +static fixed_size_mode +widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) { - scalar_int_mode result = NARROWEST_INT_MODE; + fixed_size_mode result = NARROWEST_INT_MODE; gcc_checking_assert (size > 1); + /* Use QI vector only if size is wider than a WORD. */ + if (qi_vector && size > UNITS_PER_WORD) + { + machine_mode mode; + fixed_size_mode candidate; + FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT) + if (is_a<fixed_size_mode> (mode, &candidate) + && GET_MODE_INNER (candidate) == QImode) + { + if (GET_MODE_SIZE (candidate) >= size) + break; + if (optab_handler (vec_duplicate_optab, candidate) + != CODE_FOR_nothing) + result = candidate; + } + + if (result != NARROWEST_INT_MODE) + return result; + } + opt_scalar_int_mode tmode; FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) if (GET_MODE_SIZE (tmode.require ()) < size) @@ -813,12 +875,29 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align, unsigned int max_size, by_pieces_operation op) { unsigned HOST_WIDE_INT n_insns = 0; + fixed_size_mode mode; + + if (targetm.overlap_op_by_pieces_p () && op != COMPARE_BY_PIECES) + { + /* NB: Round up L and ALIGN to the widest integer mode for + MAX_SIZE. */ + mode = widest_fixed_size_mode_for_size (max_size, + op == SET_BY_PIECES); + if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) + { + unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode)); + if (up > l) + l = up; + align = GET_MODE_ALIGNMENT (mode); + } + } align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align); while (max_size > 1 && l > 0) { - scalar_int_mode mode = widest_int_mode_for_size (max_size); + mode = widest_fixed_size_mode_for_size (max_size, + op == SET_BY_PIECES); enum insn_code icode; unsigned int modesize = GET_MODE_SIZE (mode); @@ -886,7 +965,7 @@ class pieces_addr void *m_cfndata; public: pieces_addr (rtx, bool, by_pieces_constfn, void *); - rtx adjust (scalar_int_mode, HOST_WIDE_INT); + rtx adjust (fixed_size_mode, HOST_WIDE_INT, by_pieces_prev * = nullptr); void increment_address (HOST_WIDE_INT); void maybe_predec (HOST_WIDE_INT); void maybe_postinc (HOST_WIDE_INT); @@ -988,10 +1067,12 @@ pieces_addr::decide_autoinc (machine_mode ARG_UNUSED (mode), bool reverse, but we still modify the MEM's properties. */ rtx -pieces_addr::adjust (scalar_int_mode mode, HOST_WIDE_INT offset) +pieces_addr::adjust (fixed_size_mode mode, HOST_WIDE_INT offset, + by_pieces_prev *prev) { if (m_constfn) - return m_constfn (m_cfndata, offset, mode); + /* Pass the previous data to m_constfn. */ + return m_constfn (m_cfndata, prev, offset, mode); if (m_obj == NULL_RTX) return NULL_RTX; if (m_auto) @@ -1039,13 +1120,25 @@ pieces_addr::maybe_postinc (HOST_WIDE_INT size) class op_by_pieces_d { + private: + fixed_size_mode get_usable_mode (fixed_size_mode, unsigned int); + fixed_size_mode smallest_fixed_size_mode_for_size (unsigned int); + protected: pieces_addr m_to, m_from; - unsigned HOST_WIDE_INT m_len; + /* Make m_len read-only so that smallest_fixed_size_mode_for_size can + use it to check the valid mode size. */ + const unsigned HOST_WIDE_INT m_len; HOST_WIDE_INT m_offset; unsigned int m_align; unsigned int m_max_size; bool m_reverse; + /* True if this is a stack push. */ + bool m_push; + /* True if targetm.overlap_op_by_pieces_p () returns true. */ + bool m_overlap_op_by_pieces; + /* True if QI vector mode can be used. */ + bool m_qi_vector_mode; /* Virtual functions, overriden by derived classes for the specific operation. */ @@ -1056,8 +1149,9 @@ class op_by_pieces_d } public: - op_by_pieces_d (rtx, bool, rtx, bool, by_pieces_constfn, void *, - unsigned HOST_WIDE_INT, unsigned int); + op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn, + void *, unsigned HOST_WIDE_INT, unsigned int, bool, + bool = false); void run (); }; @@ -1065,17 +1159,21 @@ class op_by_pieces_d objects named TO and FROM, which are identified as loads or stores by TO_LOAD and FROM_LOAD. If FROM is a load, the optional FROM_CFN and its associated FROM_CFN_DATA can be used to replace loads with - constant values. LEN describes the length of the operation. */ + constant values. MAX_PIECES describes the maximum number of bytes + at a time which can be moved efficiently. LEN describes the length + of the operation. */ -op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load, - rtx from, bool from_load, +op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to, + bool to_load, rtx from, bool from_load, by_pieces_constfn from_cfn, void *from_cfn_data, unsigned HOST_WIDE_INT len, - unsigned int align) + unsigned int align, bool push, + bool qi_vector_mode) : m_to (to, to_load, NULL, NULL), m_from (from, from_load, from_cfn, from_cfn_data), - m_len (len), m_max_size (MOVE_MAX_PIECES + 1) + m_len (len), m_max_size (max_pieces + 1), + m_push (push), m_qi_vector_mode (qi_vector_mode) { int toi = m_to.get_addr_inc (); int fromi = m_from.get_addr_inc (); @@ -1096,7 +1194,9 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load, if (by_pieces_ninsns (len, align, m_max_size, MOVE_BY_PIECES) > 2) { /* Find the mode of the largest comparison. */ - scalar_int_mode mode = widest_int_mode_for_size (m_max_size); + fixed_size_mode mode + = widest_fixed_size_mode_for_size (m_max_size, + m_qi_vector_mode); m_from.decide_autoinc (mode, m_reverse, len); m_to.decide_autoinc (mode, m_reverse, len); @@ -1104,6 +1204,56 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load, align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align); m_align = align; + + m_overlap_op_by_pieces = targetm.overlap_op_by_pieces_p (); +} + +/* This function returns the largest usable integer mode for LEN bytes + whose size is no bigger than size of MODE. */ + +fixed_size_mode +op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len) +{ + unsigned int size; + do + { + size = GET_MODE_SIZE (mode); + if (len >= size && prepare_mode (mode, m_align)) + break; + /* widest_fixed_size_mode_for_size checks SIZE > 1. */ + mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); + } + while (1); + return mode; +} + +/* Return the smallest integer or QI vector mode that is not narrower + than SIZE bytes. */ + +fixed_size_mode +op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) +{ + /* Use QI vector only for > size of WORD. */ + if (m_qi_vector_mode && size > UNITS_PER_WORD) + { + machine_mode mode; + fixed_size_mode candidate; + FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT) + if (is_a<fixed_size_mode> (mode, &candidate) + && GET_MODE_INNER (candidate) == QImode) + { + /* Don't return a mode wider than M_LEN. */ + if (GET_MODE_SIZE (candidate) > m_len) + break; + + if (GET_MODE_SIZE (candidate) >= size + && (optab_handler (vec_duplicate_optab, candidate) + != CODE_FOR_nothing)) + return candidate; + } + } + + return smallest_int_mode_for_size (size * BITS_PER_UNIT); } /* This function contains the main loop used for expanding a block @@ -1114,50 +1264,98 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load, void op_by_pieces_d::run () { - while (m_max_size > 1 && m_len > 0) + if (m_len == 0) + return; + + unsigned HOST_WIDE_INT length = m_len; + + /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */ + fixed_size_mode mode + = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode); + mode = get_usable_mode (mode, length); + + by_pieces_prev to_prev = { nullptr, mode }; + by_pieces_prev from_prev = { nullptr, mode }; + + do { - scalar_int_mode mode = widest_int_mode_for_size (m_max_size); + unsigned int size = GET_MODE_SIZE (mode); + rtx to1 = NULL_RTX, from1; - if (prepare_mode (mode, m_align)) + while (length >= size) { - unsigned int size = GET_MODE_SIZE (mode); - rtx to1 = NULL_RTX, from1; + if (m_reverse) + m_offset -= size; - while (m_len >= size) - { - if (m_reverse) - m_offset -= size; + to1 = m_to.adjust (mode, m_offset, &to_prev); + to_prev.data = to1; + to_prev.mode = mode; + from1 = m_from.adjust (mode, m_offset, &from_prev); + from_prev.data = from1; + from_prev.mode = mode; - to1 = m_to.adjust (mode, m_offset); - from1 = m_from.adjust (mode, m_offset); + m_to.maybe_predec (-(HOST_WIDE_INT)size); + m_from.maybe_predec (-(HOST_WIDE_INT)size); - m_to.maybe_predec (-(HOST_WIDE_INT)size); - m_from.maybe_predec (-(HOST_WIDE_INT)size); + generate (to1, from1, mode); - generate (to1, from1, mode); + m_to.maybe_postinc (size); + m_from.maybe_postinc (size); - m_to.maybe_postinc (size); - m_from.maybe_postinc (size); + if (!m_reverse) + m_offset += size; - if (!m_reverse) - m_offset += size; + length -= size; + } - m_len -= size; - } + finish_mode (mode); - finish_mode (mode); - } + if (length == 0) + return; - m_max_size = GET_MODE_SIZE (mode); + if (!m_push && m_overlap_op_by_pieces) + { + /* NB: Generate overlapping operations if it is not a stack + push since stack push must not overlap. Get the smallest + fixed size mode for M_LEN bytes. */ + mode = smallest_fixed_size_mode_for_size (length); + mode = get_usable_mode (mode, GET_MODE_SIZE (mode)); + int gap = GET_MODE_SIZE (mode) - length; + if (gap > 0) + { + /* If size of MODE > M_LEN, generate the last operation + in MODE for the remaining bytes with ovelapping memory + from the previois operation. */ + if (m_reverse) + m_offset += gap; + else + m_offset -= gap; + length += gap; + } + } + else + { + /* widest_fixed_size_mode_for_size checks SIZE > 1. */ + mode = widest_fixed_size_mode_for_size (size, + m_qi_vector_mode); + mode = get_usable_mode (mode, length); + } } + while (1); /* The code above should have handled everything. */ - gcc_assert (!m_len); + gcc_assert (!length); } /* Derived class from op_by_pieces_d, providing support for block move operations. */ +#ifdef PUSH_ROUNDING +#define PUSHG_P(to) ((to) == nullptr) +#else +#define PUSHG_P(to) false +#endif + class move_by_pieces_d : public op_by_pieces_d { insn_gen_fn m_gen_fun; @@ -1167,7 +1365,8 @@ class move_by_pieces_d : public op_by_pieces_d public: move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, unsigned int align) - : op_by_pieces_d (to, false, from, true, NULL, NULL, len, align) + : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL, + NULL, len, align, PUSHG_P (to)) { } rtx finish_retmode (memop_ret); @@ -1261,8 +1460,10 @@ class store_by_pieces_d : public op_by_pieces_d public: store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, - unsigned HOST_WIDE_INT len, unsigned int align) - : op_by_pieces_d (to, false, NULL_RTX, true, cfn, cfn_data, len, align) + unsigned HOST_WIDE_INT len, unsigned int align, + bool qi_vector_mode) + : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn, + cfn_data, len, align, false, qi_vector_mode) { } rtx finish_retmode (memop_ret); @@ -1317,7 +1518,7 @@ store_by_pieces_d::finish_retmode (memop_ret retmode) int can_store_by_pieces (unsigned HOST_WIDE_INT len, - rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode), + by_pieces_constfn constfun, void *constfundata, unsigned int align, bool memsetp) { unsigned HOST_WIDE_INT l; @@ -1351,7 +1552,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, max_size = STORE_MAX_PIECES + 1; while (max_size > 1 && l > 0) { - scalar_int_mode mode = widest_int_mode_for_size (max_size); + fixed_size_mode mode + = widest_fixed_size_mode_for_size (max_size, memsetp); icode = optab_handler (mov_optab, mode); if (icode != CODE_FOR_nothing @@ -1364,8 +1566,12 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, if (reverse) offset -= size; - cst = (*constfun) (constfundata, offset, mode); - if (!targetm.legitimate_constant_p (mode, cst)) + cst = (*constfun) (constfundata, nullptr, offset, mode); + /* All CONST_VECTORs can be loaded for memset since + vec_duplicate_optab is a precondition to pick a + vector mode for the memset expander. */ + if (!((memsetp && VECTOR_MODE_P (mode)) + || targetm.legitimate_constant_p (mode, cst))) return 0; if (!reverse) @@ -1394,7 +1600,7 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, rtx store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, - rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode), + by_pieces_constfn constfun, void *constfundata, unsigned int align, bool memsetp, memop_ret retmode) { @@ -1409,7 +1615,8 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, memsetp ? SET_BY_PIECES : STORE_BY_PIECES, optimize_insn_for_speed_p ())); - store_by_pieces_d data (to, constfun, constfundata, len, align); + store_by_pieces_d data (to, constfun, constfundata, len, align, + memsetp); data.run (); if (retmode != RETURN_BEGIN) @@ -1418,15 +1625,6 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, return to; } -/* Callback routine for clear_by_pieces. - Return const0_rtx unconditionally. */ - -static rtx -clear_by_pieces_1 (void *, HOST_WIDE_INT, scalar_int_mode) -{ - return const0_rtx; -} - /* Generate several move instructions to clear LEN bytes of block TO. (A MEM rtx with BLKmode). ALIGN is maximum alignment we can assume. */ @@ -1436,7 +1634,10 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align) if (len == 0) return; - store_by_pieces_d data (to, clear_by_pieces_1, NULL, len, align); + /* Use builtin_memset_read_str to support vector mode broadcast. */ + char c = 0; + store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align, + true); data.run (); } @@ -1458,7 +1659,8 @@ class compare_by_pieces_d : public op_by_pieces_d compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, void *op1_cfn_data, HOST_WIDE_INT len, int align, rtx_code_label *fail_label) - : op_by_pieces_d (op0, true, op1, true, op1_cfn, op1_cfn_data, len, align) + : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn, + op1_cfn_data, len, align, false) { m_fail_label = fail_label; } @@ -1727,7 +1929,7 @@ block_move_libcall_safe_for_call_parm (void) tree fn; /* If arguments are pushed on the stack, then they're safe. */ - if (PUSH_ARGS) + if (targetm.calls.push_argument (0)) return true; /* If registers go on the stack anyway, any argument is sure to clobber @@ -3054,7 +3256,8 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method, unsigned int expected_align, HOST_WIDE_INT expected_size, unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, - unsigned HOST_WIDE_INT probable_max_size) + unsigned HOST_WIDE_INT probable_max_size, + unsigned ctz_size) { machine_mode mode = GET_MODE (object); unsigned int align; @@ -3101,6 +3304,10 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method, expected_align, expected_size, min_size, max_size, probable_max_size)) ; + else if (try_store_by_multiple_pieces (object, size, ctz_size, + min_size, max_size, + NULL_RTX, 0, align)) + ; else if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (object))) return set_storage_via_libcall (object, size, const0_rtx, method == BLOCK_OP_TAILCALL); @@ -3118,7 +3325,7 @@ clear_storage (rtx object, rtx size, enum block_op_methods method) min = max = UINTVAL (size); else max = GET_MODE_MASK (GET_MODE (size)); - return clear_storage_hints (object, size, method, 0, -1, min, max, max); + return clear_storage_hints (object, size, method, 0, -1, min, max, max, 0); } @@ -4538,11 +4745,19 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, skip = (reg_parm_stack_space == 0) ? 0 : used; #ifdef PUSH_ROUNDING + /* NB: Let the backend known the number of bytes to push and + decide if push insns should be generated. */ + unsigned int push_size; + if (CONST_INT_P (size)) + push_size = INTVAL (size); + else + push_size = 0; + /* Do it with several push insns if that doesn't take lots of insns and if there is no difficulty with push insns that skip bytes on the stack for alignment purposes. */ if (args_addr == 0 - && PUSH_ARGS + && targetm.calls.push_argument (push_size) && CONST_INT_P (size) && skip == 0 && MEM_ALIGN (xinner) >= align @@ -4747,7 +4962,7 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, anti_adjust_stack (gen_int_mode (extra, Pmode)); #ifdef PUSH_ROUNDING - if (args_addr == 0 && PUSH_ARGS) + if (args_addr == 0 && targetm.calls.push_argument (0)) emit_single_push_insn (mode, x, type); else #endif @@ -5457,6 +5672,7 @@ expand_assignment (tree to, tree from, bool nontemporal) /* If to_rtx is a promoted subreg, we need to zero or sign extend the value afterwards. */ if (TREE_CODE (to) == MEM_REF + && TYPE_MODE (TREE_TYPE (from)) != BLKmode && !REF_REVERSE_STORAGE_ORDER (to) && known_eq (bitpos, 0) && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (to_rtx)))) @@ -5643,7 +5859,8 @@ emit_storent_insn (rtx to, rtx from) /* Helper function for store_expr storing of STRING_CST. */ static rtx -string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode) +string_cst_read_str (void *data, void *, HOST_WIDE_INT offset, + fixed_size_mode mode) { tree str = (tree) data; @@ -5658,10 +5875,13 @@ string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode) size_t l = TREE_STRING_LENGTH (str) - offset; memcpy (p, TREE_STRING_POINTER (str) + offset, l); memset (p + l, '\0', GET_MODE_SIZE (mode) - l); - return c_readstr (p, mode, false); + return c_readstr (p, as_a <scalar_int_mode> (mode), false); } - return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false); + /* The by-pieces infrastructure does not try to pick a vector mode + for storing STRING_CST. */ + return c_readstr (TREE_STRING_POINTER (str) + offset, + as_a <scalar_int_mode> (mode), false); } /* Generate code for computing expression EXP, @@ -6967,7 +7187,8 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size, && eltmode == GET_MODE_INNER (mode) && ((icode = optab_handler (vec_duplicate_optab, mode)) != CODE_FOR_nothing) - && (elt = uniform_vector_p (exp))) + && (elt = uniform_vector_p (exp)) + && !VECTOR_TYPE_P (TREE_TYPE (elt))) { class expand_operand ops[2]; create_output_operand (&ops[0], target, mode); @@ -7016,7 +7237,9 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size, /* Compute the size of the elements in the CTOR. It differs from the size of the vector type elements only when the CTOR elements are vectors themselves. */ - tree val_type = TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value); + tree val_type = (CONSTRUCTOR_NELTS (exp) != 0 + ? TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value) + : elttype); if (VECTOR_TYPE_P (val_type)) bitsize = tree_to_uhwi (TYPE_SIZE (val_type)); else @@ -7211,7 +7434,13 @@ store_field (rtx target, poly_int64 bitsize, poly_int64 bitpos, || !multiple_p (bitpos, BITS_PER_UNIT) || !poly_int_tree_p (DECL_SIZE (TREE_OPERAND (exp, 1)), &decl_bitsize) - || maybe_ne (decl_bitsize, bitsize))) + || maybe_ne (decl_bitsize, bitsize)) + /* A call with an addressable return type and return-slot + optimization must not need bitfield operations but we must + pass down the original target. */ + && (TREE_CODE (exp) != CALL_EXPR + || !TREE_ADDRESSABLE (TREE_TYPE (exp)) + || !CALL_EXPR_RETURN_SLOT_OPT (exp))) /* If we are expanding a MEM_REF of a non-BLKmode non-addressable decl we must use bitfield operations. */ || (known_size_p (bitsize) @@ -8195,7 +8424,7 @@ expand_expr_addr_expr_1 (tree exp, rtx target, scalar_int_mode tmode, array with address of COMPOUND_LITERAL_EXPR in DECL_INITIAL; the initializers aren't gimplified. */ if (COMPOUND_LITERAL_EXPR_DECL (exp) - && TREE_STATIC (COMPOUND_LITERAL_EXPR_DECL (exp))) + && is_global_var (COMPOUND_LITERAL_EXPR_DECL (exp))) return expand_expr_addr_expr_1 (COMPOUND_LITERAL_EXPR_DECL (exp), target, tmode, modifier, as); /* FALLTHRU */ @@ -8412,6 +8641,19 @@ expand_constructor (tree exp, rtx target, enum expand_modifier modifier, return constructor; } + /* If the CTOR is available in static storage and not mostly + zeros and we can move it by pieces prefer to do so since + that's usually more efficient than performing a series of + stores from immediates. */ + if (avoid_temp_mem + && TREE_STATIC (exp) + && TREE_CONSTANT (exp) + && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)) + && can_move_by_pieces (tree_to_uhwi (TYPE_SIZE_UNIT (type)), + TYPE_ALIGN (type)) + && ! mostly_zeros_p (exp)) + return NULL_RTX; + /* Handle calls that pass values in multiple non-contiguous locations. The Irix 6 ABI has examples of this. */ if (target == 0 || ! safe_from_p (target, exp, 1) @@ -8653,6 +8895,56 @@ expand_misaligned_mem_ref (rtx temp, machine_mode mode, int unsignedp, return temp; } +/* Helper function of expand_expr_2, expand a division or modulo. + op0 and op1 should be already expanded treeop0 and treeop1, using + expand_operands. */ + +static rtx +expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0, + tree treeop1, rtx op0, rtx op1, rtx target, int unsignedp) +{ + bool mod_p = (code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR + || code == CEIL_MOD_EXPR || code == ROUND_MOD_EXPR); + if (SCALAR_INT_MODE_P (mode) + && optimize >= 2 + && get_range_pos_neg (treeop0) == 1 + && get_range_pos_neg (treeop1) == 1) + { + /* If both arguments are known to be positive when interpreted + as signed, we can expand it as both signed and unsigned + division or modulo. Choose the cheaper sequence in that case. */ + bool speed_p = optimize_insn_for_speed_p (); + do_pending_stack_adjust (); + start_sequence (); + rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1); + rtx_insn *uns_insns = get_insns (); + end_sequence (); + start_sequence (); + rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0); + rtx_insn *sgn_insns = get_insns (); + end_sequence (); + unsigned uns_cost = seq_cost (uns_insns, speed_p); + unsigned sgn_cost = seq_cost (sgn_insns, speed_p); + + /* If costs are the same then use as tie breaker the other other + factor. */ + if (uns_cost == sgn_cost) + { + uns_cost = seq_cost (uns_insns, !speed_p); + sgn_cost = seq_cost (sgn_insns, !speed_p); + } + + if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp)) + { + emit_insn (uns_insns); + return uns_ret; + } + emit_insn (sgn_insns); + return sgn_ret; + } + return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp); +} + rtx expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, enum expand_modifier modifier) @@ -9190,14 +9482,78 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, if (!REG_P (op0)) op0 = copy_to_mode_reg (mode, op0); - return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0, - gen_int_mode (tree_to_shwi (exp1), - TYPE_MODE (TREE_TYPE (exp1))))); + op1 = gen_int_mode (tree_to_shwi (exp1), + TYPE_MODE (TREE_TYPE (exp1))); + return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0, op1)); } if (modifier == EXPAND_STACK_PARM) target = 0; + if (SCALAR_INT_MODE_P (mode) && optimize >= 2) + { + gimple *def_stmt0 = get_def_for_expr (treeop0, TRUNC_DIV_EXPR); + gimple *def_stmt1 = get_def_for_expr (treeop1, TRUNC_DIV_EXPR); + if (def_stmt0 + && !operand_equal_p (treeop1, gimple_assign_rhs2 (def_stmt0), 0)) + def_stmt0 = NULL; + if (def_stmt1 + && !operand_equal_p (treeop0, gimple_assign_rhs2 (def_stmt1), 0)) + def_stmt1 = NULL; + + if (def_stmt0 || def_stmt1) + { + /* X / Y * Y can be expanded as X - X % Y too. + Choose the cheaper sequence of those two. */ + if (def_stmt0) + treeop0 = gimple_assign_rhs1 (def_stmt0); + else + { + treeop1 = treeop0; + treeop0 = gimple_assign_rhs1 (def_stmt1); + } + expand_operands (treeop0, treeop1, subtarget, &op0, &op1, + EXPAND_NORMAL); + bool speed_p = optimize_insn_for_speed_p (); + do_pending_stack_adjust (); + start_sequence (); + rtx divmul_ret + = expand_expr_divmod (TRUNC_DIV_EXPR, mode, treeop0, treeop1, + op0, op1, NULL_RTX, unsignedp); + divmul_ret = expand_mult (mode, divmul_ret, op1, target, + unsignedp); + rtx_insn *divmul_insns = get_insns (); + end_sequence (); + start_sequence (); + rtx modsub_ret + = expand_expr_divmod (TRUNC_MOD_EXPR, mode, treeop0, treeop1, + op0, op1, NULL_RTX, unsignedp); + this_optab = optab_for_tree_code (MINUS_EXPR, type, + optab_default); + modsub_ret = expand_binop (mode, this_optab, op0, modsub_ret, + target, unsignedp, OPTAB_LIB_WIDEN); + rtx_insn *modsub_insns = get_insns (); + end_sequence (); + unsigned divmul_cost = seq_cost (divmul_insns, speed_p); + unsigned modsub_cost = seq_cost (modsub_insns, speed_p); + /* If costs are the same then use as tie breaker the other other + factor. */ + if (divmul_cost == modsub_cost) + { + divmul_cost = seq_cost (divmul_insns, !speed_p); + modsub_cost = seq_cost (modsub_insns, !speed_p); + } + + if (divmul_cost <= modsub_cost) + { + emit_insn (divmul_insns); + return REDUCE_BIT_FIELD (divmul_ret); + } + emit_insn (modsub_insns); + return REDUCE_BIT_FIELD (modsub_ret); + } + } + expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp)); @@ -9211,61 +9567,21 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, case CEIL_DIV_EXPR: case ROUND_DIV_EXPR: case EXACT_DIV_EXPR: - { - /* If this is a fixed-point operation, then we cannot use the code - below because "expand_divmod" doesn't support sat/no-sat fixed-point - divisions. */ - if (ALL_FIXED_POINT_MODE_P (mode)) - goto binop; - - if (modifier == EXPAND_STACK_PARM) - target = 0; - /* Possible optimization: compute the dividend with EXPAND_SUM - then if the divisor is constant can optimize the case - where some terms of the dividend have coeffs divisible by it. */ - expand_operands (treeop0, treeop1, - subtarget, &op0, &op1, EXPAND_NORMAL); - bool mod_p = code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR - || code == CEIL_MOD_EXPR || code == ROUND_MOD_EXPR; - if (SCALAR_INT_MODE_P (mode) - && optimize >= 2 - && get_range_pos_neg (treeop0) == 1 - && get_range_pos_neg (treeop1) == 1) - { - /* If both arguments are known to be positive when interpreted - as signed, we can expand it as both signed and unsigned - division or modulo. Choose the cheaper sequence in that case. */ - bool speed_p = optimize_insn_for_speed_p (); - do_pending_stack_adjust (); - start_sequence (); - rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1); - rtx_insn *uns_insns = get_insns (); - end_sequence (); - start_sequence (); - rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0); - rtx_insn *sgn_insns = get_insns (); - end_sequence (); - unsigned uns_cost = seq_cost (uns_insns, speed_p); - unsigned sgn_cost = seq_cost (sgn_insns, speed_p); - - /* If costs are the same then use as tie breaker the other - other factor. */ - if (uns_cost == sgn_cost) - { - uns_cost = seq_cost (uns_insns, !speed_p); - sgn_cost = seq_cost (sgn_insns, !speed_p); - } - - if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp)) - { - emit_insn (uns_insns); - return uns_ret; - } - emit_insn (sgn_insns); - return sgn_ret; - } - return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp); - } + /* If this is a fixed-point operation, then we cannot use the code + below because "expand_divmod" doesn't support sat/no-sat fixed-point + divisions. */ + if (ALL_FIXED_POINT_MODE_P (mode)) + goto binop; + + if (modifier == EXPAND_STACK_PARM) + target = 0; + /* Possible optimization: compute the dividend with EXPAND_SUM + then if the divisor is constant can optimize the case + where some terms of the dividend have coeffs divisible by it. */ + expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); + return expand_expr_divmod (code, mode, treeop0, treeop1, op0, op1, + target, unsignedp); + case RDIV_EXPR: goto binop; @@ -9800,6 +10116,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, case VEC_PACK_SAT_EXPR: case VEC_PACK_FIX_TRUNC_EXPR: mode = TYPE_MODE (TREE_TYPE (treeop0)); + subtarget = NULL_RTX; goto binop; case VEC_PACK_TRUNC_EXPR: @@ -9823,6 +10140,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, return eops[0].value; } mode = TYPE_MODE (TREE_TYPE (treeop0)); + subtarget = NULL_RTX; goto binop; case VEC_PACK_FLOAT_EXPR: @@ -10221,8 +10539,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, exp = SSA_NAME_VAR (ssa_name); goto expand_decl_rtl; - case PARM_DECL: case VAR_DECL: + /* Allow accel compiler to handle variables that require special + treatment, e.g. if they have been modified in some way earlier in + compilation by the adjust_private_decl OpenACC hook. */ + if (flag_openacc && targetm.goacc.expand_var_decl) + { + temp = targetm.goacc.expand_var_decl (exp); + if (temp) + return temp; + } + /* ... fall through ... */ + + case PARM_DECL: /* If a static var's type was incomplete when the decl was written, but the type is complete now, lay out the decl now. */ if (DECL_SIZE (exp) == 0 @@ -10764,61 +11093,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, goto normal_inner_ref; case COMPONENT_REF: - /* If the operand is a CONSTRUCTOR, we can just extract the - appropriate field if it is present. */ - if (TREE_CODE (treeop0) == CONSTRUCTOR) - { - unsigned HOST_WIDE_INT idx; - tree field, value; - scalar_int_mode field_mode; - - FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (treeop0), - idx, field, value) - if (field == treeop1 - /* We can normally use the value of the field in the - CONSTRUCTOR. However, if this is a bitfield in - an integral mode that we can fit in a HOST_WIDE_INT, - we must mask only the number of bits in the bitfield, - since this is done implicitly by the constructor. If - the bitfield does not meet either of those conditions, - we can't do this optimization. */ - && (! DECL_BIT_FIELD (field) - || (is_int_mode (DECL_MODE (field), &field_mode) - && (GET_MODE_PRECISION (field_mode) - <= HOST_BITS_PER_WIDE_INT)))) - { - if (DECL_BIT_FIELD (field) - && modifier == EXPAND_STACK_PARM) - target = 0; - op0 = expand_expr (value, target, tmode, modifier); - if (DECL_BIT_FIELD (field)) - { - HOST_WIDE_INT bitsize = TREE_INT_CST_LOW (DECL_SIZE (field)); - scalar_int_mode imode - = SCALAR_INT_TYPE_MODE (TREE_TYPE (field)); - - if (TYPE_UNSIGNED (TREE_TYPE (field))) - { - op1 = gen_int_mode ((HOST_WIDE_INT_1 << bitsize) - 1, - imode); - op0 = expand_and (imode, op0, op1, target); - } - else - { - int count = GET_MODE_PRECISION (imode) - bitsize; - - op0 = expand_shift (LSHIFT_EXPR, imode, op0, count, - target, 0); - op0 = expand_shift (RSHIFT_EXPR, imode, op0, count, - target, 0); - } - } - - return op0; - } - } - goto normal_inner_ref; - + gcc_assert (TREE_CODE (treeop0) != CONSTRUCTOR); + /* Fall through. */ case BIT_FIELD_REF: case ARRAY_RANGE_REF: normal_inner_ref: @@ -11235,7 +11511,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, /* All valid uses of __builtin_va_arg_pack () are removed during inlining. */ if (CALL_EXPR_VA_ARG_PACK (exp)) - error ("%Kinvalid use of %<__builtin_va_arg_pack ()%>", exp); + error ("invalid use of %<__builtin_va_arg_pack ()%>"); { tree fndecl = get_callee_fndecl (exp), attr; @@ -11247,7 +11523,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, DECL_ATTRIBUTES (fndecl))) != NULL) { const char *ident = lang_hooks.decl_printable_name (fndecl, 1); - error ("%Kcall to %qs declared with attribute error: %s", exp, + error ("call to %qs declared with attribute error: %s", identifier_to_locale (ident), TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)))); } @@ -11259,10 +11535,10 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, DECL_ATTRIBUTES (fndecl))) != NULL) { const char *ident = lang_hooks.decl_printable_name (fndecl, 1); - warning_at (tree_nonartificial_location (exp), + warning_at (EXPR_LOCATION (exp), OPT_Wattribute_warning, - "%Kcall to %qs declared with attribute warning: %s", - exp, identifier_to_locale (ident), + "call to %qs declared with attribute warning: %s", + identifier_to_locale (ident), TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)))); } @@ -12275,6 +12551,37 @@ maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1) *arg1 = c4; return code == EQ_EXPR ? LE_EXPR : GT_EXPR; } + +/* Optimize x - y < 0 into x < 0 if x - y has undefined overflow. */ + +void +maybe_optimize_sub_cmp_0 (enum tree_code code, tree *arg0, tree *arg1) +{ + gcc_checking_assert (code == GT_EXPR || code == GE_EXPR + || code == LT_EXPR || code == LE_EXPR); + gcc_checking_assert (integer_zerop (*arg1)); + + if (!optimize) + return; + + gimple *stmt = get_def_for_expr (*arg0, MINUS_EXPR); + if (stmt == NULL) + return; + + tree treeop0 = gimple_assign_rhs1 (stmt); + tree treeop1 = gimple_assign_rhs2 (stmt); + if (!TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (treeop0))) + return; + + if (issue_strict_overflow_warning (WARN_STRICT_OVERFLOW_COMPARISON)) + warning_at (gimple_location (stmt), OPT_Wstrict_overflow, + "assuming signed overflow does not occur when " + "simplifying %<X - Y %s 0%> to %<X %s Y%>", + op_symbol_code (code), op_symbol_code (code)); + + *arg0 = treeop0; + *arg1 = treeop1; +} /* Generate code to calculate OPS, and exploded expression using a store-flag instruction and return an rtx for the result. @@ -12363,6 +12670,14 @@ do_store_flag (sepops ops, rtx target, machine_mode mode) } } + /* Optimize (x - y) < 0 into x < y if x - y has undefined overflow. */ + if (!unsignedp + && (ops->code == LT_EXPR || ops->code == LE_EXPR + || ops->code == GT_EXPR || ops->code == GE_EXPR) + && integer_zerop (arg1) + && TREE_CODE (arg0) == SSA_NAME) + maybe_optimize_sub_cmp_0 (ops->code, &arg0, &arg1); + /* Get the rtx comparison code to use. We know that EXP is a comparison operation of some type. Some comparisons against 1 and -1 can be converted to comparisons with zero. Do so here so that the tests |