aboutsummaryrefslogtreecommitdiff
path: root/gcc/expr.c
diff options
context:
space:
mode:
authorThomas Koenig <tkoenig@gcc.gnu.org>2021-09-13 19:49:49 +0200
committerThomas Koenig <tkoenig@gcc.gnu.org>2021-09-13 19:49:49 +0200
commitb18a97e5dd0935e1c4a626c230f21457d0aad3d5 (patch)
treec1818f41af6fe780deafb6cd6a183f32085fe654 /gcc/expr.c
parente76a53644c9d70e998c0d050e9a456af388c6b61 (diff)
downloadgcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.zip
gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.tar.gz
gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.tar.bz2
Merged current trunk to branch.
Diffstat (limited to 'gcc/expr.c')
-rw-r--r--gcc/expr.c705
1 files changed, 510 insertions, 195 deletions
diff --git a/gcc/expr.c b/gcc/expr.c
index 33934d6..e0bcbcc 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -1,5 +1,5 @@
/* Convert tree expression to rtl instructions, for GNU compiler.
- Copyright (C) 1988-2020 Free Software Foundation, Inc.
+ Copyright (C) 1988-2021 Free Software Foundation, Inc.
This file is part of GCC.
@@ -62,6 +62,8 @@ along with GCC; see the file COPYING3. If not see
#include "ccmp.h"
#include "gimple-fold.h"
#include "rtx-vector-builder.h"
+#include "tree-pretty-print.h"
+#include "flags.h"
/* If this is nonzero, we do not bother generating VOLATILE
@@ -234,8 +236,27 @@ convert_move (rtx to, rtx from, int unsignedp)
>= GET_MODE_PRECISION (to_int_mode))
&& SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp))
{
+ scalar_int_mode int_orig_mode;
+ scalar_int_mode int_inner_mode;
+ machine_mode orig_mode = GET_MODE (from);
+
from = gen_lowpart (to_int_mode, SUBREG_REG (from));
from_mode = to_int_mode;
+
+ /* Preserve SUBREG_PROMOTED_VAR_P if the new mode is wider than
+ the original mode, but narrower than the inner mode. */
+ if (GET_CODE (from) == SUBREG
+ && is_a <scalar_int_mode> (orig_mode, &int_orig_mode)
+ && GET_MODE_PRECISION (to_int_mode)
+ > GET_MODE_PRECISION (int_orig_mode)
+ && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (from)),
+ &int_inner_mode)
+ && GET_MODE_PRECISION (int_inner_mode)
+ > GET_MODE_PRECISION (to_int_mode))
+ {
+ SUBREG_PROMOTED_VAR_P (from) = 1;
+ SUBREG_PROMOTED_SET (from, unsignedp);
+ }
}
gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
@@ -686,7 +707,27 @@ convert_modes (machine_mode mode, machine_mode oldmode, rtx x, int unsignedp)
&& (GET_MODE_PRECISION (subreg_promoted_mode (x))
>= GET_MODE_PRECISION (int_mode))
&& SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp))
- x = gen_lowpart (int_mode, SUBREG_REG (x));
+ {
+ scalar_int_mode int_orig_mode;
+ scalar_int_mode int_inner_mode;
+ machine_mode orig_mode = GET_MODE (x);
+ x = gen_lowpart (int_mode, SUBREG_REG (x));
+
+ /* Preserve SUBREG_PROMOTED_VAR_P if the new mode is wider than
+ the original mode, but narrower than the inner mode. */
+ if (GET_CODE (x) == SUBREG
+ && is_a <scalar_int_mode> (orig_mode, &int_orig_mode)
+ && GET_MODE_PRECISION (int_mode)
+ > GET_MODE_PRECISION (int_orig_mode)
+ && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (x)),
+ &int_inner_mode)
+ && GET_MODE_PRECISION (int_inner_mode)
+ > GET_MODE_PRECISION (int_mode))
+ {
+ SUBREG_PROMOTED_VAR_P (x) = 1;
+ SUBREG_PROMOTED_SET (x, unsignedp);
+ }
+ }
if (GET_MODE (x) != VOIDmode)
oldmode = GET_MODE (x);
@@ -744,7 +785,7 @@ static unsigned int
alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
{
scalar_int_mode tmode
- = int_mode_for_size (max_pieces * BITS_PER_UNIT, 1).require ();
+ = int_mode_for_size (max_pieces * BITS_PER_UNIT, 0).require ();
if (align >= GET_MODE_ALIGNMENT (tmode))
align = GET_MODE_ALIGNMENT (tmode);
@@ -767,15 +808,36 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
return align;
}
-/* Return the widest integer mode that is narrower than SIZE bytes. */
+/* Return the widest QI vector, if QI_MODE is true, or integer mode
+ that is narrower than SIZE bytes. */
-static scalar_int_mode
-widest_int_mode_for_size (unsigned int size)
+static fixed_size_mode
+widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector)
{
- scalar_int_mode result = NARROWEST_INT_MODE;
+ fixed_size_mode result = NARROWEST_INT_MODE;
gcc_checking_assert (size > 1);
+ /* Use QI vector only if size is wider than a WORD. */
+ if (qi_vector && size > UNITS_PER_WORD)
+ {
+ machine_mode mode;
+ fixed_size_mode candidate;
+ FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
+ if (is_a<fixed_size_mode> (mode, &candidate)
+ && GET_MODE_INNER (candidate) == QImode)
+ {
+ if (GET_MODE_SIZE (candidate) >= size)
+ break;
+ if (optab_handler (vec_duplicate_optab, candidate)
+ != CODE_FOR_nothing)
+ result = candidate;
+ }
+
+ if (result != NARROWEST_INT_MODE)
+ return result;
+ }
+
opt_scalar_int_mode tmode;
FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT)
if (GET_MODE_SIZE (tmode.require ()) < size)
@@ -813,12 +875,29 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
unsigned int max_size, by_pieces_operation op)
{
unsigned HOST_WIDE_INT n_insns = 0;
+ fixed_size_mode mode;
+
+ if (targetm.overlap_op_by_pieces_p () && op != COMPARE_BY_PIECES)
+ {
+ /* NB: Round up L and ALIGN to the widest integer mode for
+ MAX_SIZE. */
+ mode = widest_fixed_size_mode_for_size (max_size,
+ op == SET_BY_PIECES);
+ if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
+ {
+ unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode));
+ if (up > l)
+ l = up;
+ align = GET_MODE_ALIGNMENT (mode);
+ }
+ }
align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
while (max_size > 1 && l > 0)
{
- scalar_int_mode mode = widest_int_mode_for_size (max_size);
+ mode = widest_fixed_size_mode_for_size (max_size,
+ op == SET_BY_PIECES);
enum insn_code icode;
unsigned int modesize = GET_MODE_SIZE (mode);
@@ -886,7 +965,7 @@ class pieces_addr
void *m_cfndata;
public:
pieces_addr (rtx, bool, by_pieces_constfn, void *);
- rtx adjust (scalar_int_mode, HOST_WIDE_INT);
+ rtx adjust (fixed_size_mode, HOST_WIDE_INT, by_pieces_prev * = nullptr);
void increment_address (HOST_WIDE_INT);
void maybe_predec (HOST_WIDE_INT);
void maybe_postinc (HOST_WIDE_INT);
@@ -988,10 +1067,12 @@ pieces_addr::decide_autoinc (machine_mode ARG_UNUSED (mode), bool reverse,
but we still modify the MEM's properties. */
rtx
-pieces_addr::adjust (scalar_int_mode mode, HOST_WIDE_INT offset)
+pieces_addr::adjust (fixed_size_mode mode, HOST_WIDE_INT offset,
+ by_pieces_prev *prev)
{
if (m_constfn)
- return m_constfn (m_cfndata, offset, mode);
+ /* Pass the previous data to m_constfn. */
+ return m_constfn (m_cfndata, prev, offset, mode);
if (m_obj == NULL_RTX)
return NULL_RTX;
if (m_auto)
@@ -1039,13 +1120,25 @@ pieces_addr::maybe_postinc (HOST_WIDE_INT size)
class op_by_pieces_d
{
+ private:
+ fixed_size_mode get_usable_mode (fixed_size_mode, unsigned int);
+ fixed_size_mode smallest_fixed_size_mode_for_size (unsigned int);
+
protected:
pieces_addr m_to, m_from;
- unsigned HOST_WIDE_INT m_len;
+ /* Make m_len read-only so that smallest_fixed_size_mode_for_size can
+ use it to check the valid mode size. */
+ const unsigned HOST_WIDE_INT m_len;
HOST_WIDE_INT m_offset;
unsigned int m_align;
unsigned int m_max_size;
bool m_reverse;
+ /* True if this is a stack push. */
+ bool m_push;
+ /* True if targetm.overlap_op_by_pieces_p () returns true. */
+ bool m_overlap_op_by_pieces;
+ /* True if QI vector mode can be used. */
+ bool m_qi_vector_mode;
/* Virtual functions, overriden by derived classes for the specific
operation. */
@@ -1056,8 +1149,9 @@ class op_by_pieces_d
}
public:
- op_by_pieces_d (rtx, bool, rtx, bool, by_pieces_constfn, void *,
- unsigned HOST_WIDE_INT, unsigned int);
+ op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn,
+ void *, unsigned HOST_WIDE_INT, unsigned int, bool,
+ bool = false);
void run ();
};
@@ -1065,17 +1159,21 @@ class op_by_pieces_d
objects named TO and FROM, which are identified as loads or stores
by TO_LOAD and FROM_LOAD. If FROM is a load, the optional FROM_CFN
and its associated FROM_CFN_DATA can be used to replace loads with
- constant values. LEN describes the length of the operation. */
+ constant values. MAX_PIECES describes the maximum number of bytes
+ at a time which can be moved efficiently. LEN describes the length
+ of the operation. */
-op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
- rtx from, bool from_load,
+op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to,
+ bool to_load, rtx from, bool from_load,
by_pieces_constfn from_cfn,
void *from_cfn_data,
unsigned HOST_WIDE_INT len,
- unsigned int align)
+ unsigned int align, bool push,
+ bool qi_vector_mode)
: m_to (to, to_load, NULL, NULL),
m_from (from, from_load, from_cfn, from_cfn_data),
- m_len (len), m_max_size (MOVE_MAX_PIECES + 1)
+ m_len (len), m_max_size (max_pieces + 1),
+ m_push (push), m_qi_vector_mode (qi_vector_mode)
{
int toi = m_to.get_addr_inc ();
int fromi = m_from.get_addr_inc ();
@@ -1096,7 +1194,9 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
if (by_pieces_ninsns (len, align, m_max_size, MOVE_BY_PIECES) > 2)
{
/* Find the mode of the largest comparison. */
- scalar_int_mode mode = widest_int_mode_for_size (m_max_size);
+ fixed_size_mode mode
+ = widest_fixed_size_mode_for_size (m_max_size,
+ m_qi_vector_mode);
m_from.decide_autoinc (mode, m_reverse, len);
m_to.decide_autoinc (mode, m_reverse, len);
@@ -1104,6 +1204,56 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
m_align = align;
+
+ m_overlap_op_by_pieces = targetm.overlap_op_by_pieces_p ();
+}
+
+/* This function returns the largest usable integer mode for LEN bytes
+ whose size is no bigger than size of MODE. */
+
+fixed_size_mode
+op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len)
+{
+ unsigned int size;
+ do
+ {
+ size = GET_MODE_SIZE (mode);
+ if (len >= size && prepare_mode (mode, m_align))
+ break;
+ /* widest_fixed_size_mode_for_size checks SIZE > 1. */
+ mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode);
+ }
+ while (1);
+ return mode;
+}
+
+/* Return the smallest integer or QI vector mode that is not narrower
+ than SIZE bytes. */
+
+fixed_size_mode
+op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size)
+{
+ /* Use QI vector only for > size of WORD. */
+ if (m_qi_vector_mode && size > UNITS_PER_WORD)
+ {
+ machine_mode mode;
+ fixed_size_mode candidate;
+ FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
+ if (is_a<fixed_size_mode> (mode, &candidate)
+ && GET_MODE_INNER (candidate) == QImode)
+ {
+ /* Don't return a mode wider than M_LEN. */
+ if (GET_MODE_SIZE (candidate) > m_len)
+ break;
+
+ if (GET_MODE_SIZE (candidate) >= size
+ && (optab_handler (vec_duplicate_optab, candidate)
+ != CODE_FOR_nothing))
+ return candidate;
+ }
+ }
+
+ return smallest_int_mode_for_size (size * BITS_PER_UNIT);
}
/* This function contains the main loop used for expanding a block
@@ -1114,50 +1264,98 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
void
op_by_pieces_d::run ()
{
- while (m_max_size > 1 && m_len > 0)
+ if (m_len == 0)
+ return;
+
+ unsigned HOST_WIDE_INT length = m_len;
+
+ /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */
+ fixed_size_mode mode
+ = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode);
+ mode = get_usable_mode (mode, length);
+
+ by_pieces_prev to_prev = { nullptr, mode };
+ by_pieces_prev from_prev = { nullptr, mode };
+
+ do
{
- scalar_int_mode mode = widest_int_mode_for_size (m_max_size);
+ unsigned int size = GET_MODE_SIZE (mode);
+ rtx to1 = NULL_RTX, from1;
- if (prepare_mode (mode, m_align))
+ while (length >= size)
{
- unsigned int size = GET_MODE_SIZE (mode);
- rtx to1 = NULL_RTX, from1;
+ if (m_reverse)
+ m_offset -= size;
- while (m_len >= size)
- {
- if (m_reverse)
- m_offset -= size;
+ to1 = m_to.adjust (mode, m_offset, &to_prev);
+ to_prev.data = to1;
+ to_prev.mode = mode;
+ from1 = m_from.adjust (mode, m_offset, &from_prev);
+ from_prev.data = from1;
+ from_prev.mode = mode;
- to1 = m_to.adjust (mode, m_offset);
- from1 = m_from.adjust (mode, m_offset);
+ m_to.maybe_predec (-(HOST_WIDE_INT)size);
+ m_from.maybe_predec (-(HOST_WIDE_INT)size);
- m_to.maybe_predec (-(HOST_WIDE_INT)size);
- m_from.maybe_predec (-(HOST_WIDE_INT)size);
+ generate (to1, from1, mode);
- generate (to1, from1, mode);
+ m_to.maybe_postinc (size);
+ m_from.maybe_postinc (size);
- m_to.maybe_postinc (size);
- m_from.maybe_postinc (size);
+ if (!m_reverse)
+ m_offset += size;
- if (!m_reverse)
- m_offset += size;
+ length -= size;
+ }
- m_len -= size;
- }
+ finish_mode (mode);
- finish_mode (mode);
- }
+ if (length == 0)
+ return;
- m_max_size = GET_MODE_SIZE (mode);
+ if (!m_push && m_overlap_op_by_pieces)
+ {
+ /* NB: Generate overlapping operations if it is not a stack
+ push since stack push must not overlap. Get the smallest
+ fixed size mode for M_LEN bytes. */
+ mode = smallest_fixed_size_mode_for_size (length);
+ mode = get_usable_mode (mode, GET_MODE_SIZE (mode));
+ int gap = GET_MODE_SIZE (mode) - length;
+ if (gap > 0)
+ {
+ /* If size of MODE > M_LEN, generate the last operation
+ in MODE for the remaining bytes with ovelapping memory
+ from the previois operation. */
+ if (m_reverse)
+ m_offset += gap;
+ else
+ m_offset -= gap;
+ length += gap;
+ }
+ }
+ else
+ {
+ /* widest_fixed_size_mode_for_size checks SIZE > 1. */
+ mode = widest_fixed_size_mode_for_size (size,
+ m_qi_vector_mode);
+ mode = get_usable_mode (mode, length);
+ }
}
+ while (1);
/* The code above should have handled everything. */
- gcc_assert (!m_len);
+ gcc_assert (!length);
}
/* Derived class from op_by_pieces_d, providing support for block move
operations. */
+#ifdef PUSH_ROUNDING
+#define PUSHG_P(to) ((to) == nullptr)
+#else
+#define PUSHG_P(to) false
+#endif
+
class move_by_pieces_d : public op_by_pieces_d
{
insn_gen_fn m_gen_fun;
@@ -1167,7 +1365,8 @@ class move_by_pieces_d : public op_by_pieces_d
public:
move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len,
unsigned int align)
- : op_by_pieces_d (to, false, from, true, NULL, NULL, len, align)
+ : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL,
+ NULL, len, align, PUSHG_P (to))
{
}
rtx finish_retmode (memop_ret);
@@ -1261,8 +1460,10 @@ class store_by_pieces_d : public op_by_pieces_d
public:
store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data,
- unsigned HOST_WIDE_INT len, unsigned int align)
- : op_by_pieces_d (to, false, NULL_RTX, true, cfn, cfn_data, len, align)
+ unsigned HOST_WIDE_INT len, unsigned int align,
+ bool qi_vector_mode)
+ : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn,
+ cfn_data, len, align, false, qi_vector_mode)
{
}
rtx finish_retmode (memop_ret);
@@ -1317,7 +1518,7 @@ store_by_pieces_d::finish_retmode (memop_ret retmode)
int
can_store_by_pieces (unsigned HOST_WIDE_INT len,
- rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode),
+ by_pieces_constfn constfun,
void *constfundata, unsigned int align, bool memsetp)
{
unsigned HOST_WIDE_INT l;
@@ -1351,7 +1552,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
max_size = STORE_MAX_PIECES + 1;
while (max_size > 1 && l > 0)
{
- scalar_int_mode mode = widest_int_mode_for_size (max_size);
+ fixed_size_mode mode
+ = widest_fixed_size_mode_for_size (max_size, memsetp);
icode = optab_handler (mov_optab, mode);
if (icode != CODE_FOR_nothing
@@ -1364,8 +1566,12 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
if (reverse)
offset -= size;
- cst = (*constfun) (constfundata, offset, mode);
- if (!targetm.legitimate_constant_p (mode, cst))
+ cst = (*constfun) (constfundata, nullptr, offset, mode);
+ /* All CONST_VECTORs can be loaded for memset since
+ vec_duplicate_optab is a precondition to pick a
+ vector mode for the memset expander. */
+ if (!((memsetp && VECTOR_MODE_P (mode))
+ || targetm.legitimate_constant_p (mode, cst)))
return 0;
if (!reverse)
@@ -1394,7 +1600,7 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
rtx
store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
- rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode),
+ by_pieces_constfn constfun,
void *constfundata, unsigned int align, bool memsetp,
memop_ret retmode)
{
@@ -1409,7 +1615,8 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
memsetp ? SET_BY_PIECES : STORE_BY_PIECES,
optimize_insn_for_speed_p ()));
- store_by_pieces_d data (to, constfun, constfundata, len, align);
+ store_by_pieces_d data (to, constfun, constfundata, len, align,
+ memsetp);
data.run ();
if (retmode != RETURN_BEGIN)
@@ -1418,15 +1625,6 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
return to;
}
-/* Callback routine for clear_by_pieces.
- Return const0_rtx unconditionally. */
-
-static rtx
-clear_by_pieces_1 (void *, HOST_WIDE_INT, scalar_int_mode)
-{
- return const0_rtx;
-}
-
/* Generate several move instructions to clear LEN bytes of block TO. (A MEM
rtx with BLKmode). ALIGN is maximum alignment we can assume. */
@@ -1436,7 +1634,10 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
if (len == 0)
return;
- store_by_pieces_d data (to, clear_by_pieces_1, NULL, len, align);
+ /* Use builtin_memset_read_str to support vector mode broadcast. */
+ char c = 0;
+ store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align,
+ true);
data.run ();
}
@@ -1458,7 +1659,8 @@ class compare_by_pieces_d : public op_by_pieces_d
compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn,
void *op1_cfn_data, HOST_WIDE_INT len, int align,
rtx_code_label *fail_label)
- : op_by_pieces_d (op0, true, op1, true, op1_cfn, op1_cfn_data, len, align)
+ : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn,
+ op1_cfn_data, len, align, false)
{
m_fail_label = fail_label;
}
@@ -1727,7 +1929,7 @@ block_move_libcall_safe_for_call_parm (void)
tree fn;
/* If arguments are pushed on the stack, then they're safe. */
- if (PUSH_ARGS)
+ if (targetm.calls.push_argument (0))
return true;
/* If registers go on the stack anyway, any argument is sure to clobber
@@ -3054,7 +3256,8 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
unsigned int expected_align, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size,
unsigned HOST_WIDE_INT max_size,
- unsigned HOST_WIDE_INT probable_max_size)
+ unsigned HOST_WIDE_INT probable_max_size,
+ unsigned ctz_size)
{
machine_mode mode = GET_MODE (object);
unsigned int align;
@@ -3101,6 +3304,10 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
expected_align, expected_size,
min_size, max_size, probable_max_size))
;
+ else if (try_store_by_multiple_pieces (object, size, ctz_size,
+ min_size, max_size,
+ NULL_RTX, 0, align))
+ ;
else if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (object)))
return set_storage_via_libcall (object, size, const0_rtx,
method == BLOCK_OP_TAILCALL);
@@ -3118,7 +3325,7 @@ clear_storage (rtx object, rtx size, enum block_op_methods method)
min = max = UINTVAL (size);
else
max = GET_MODE_MASK (GET_MODE (size));
- return clear_storage_hints (object, size, method, 0, -1, min, max, max);
+ return clear_storage_hints (object, size, method, 0, -1, min, max, max, 0);
}
@@ -4538,11 +4745,19 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
skip = (reg_parm_stack_space == 0) ? 0 : used;
#ifdef PUSH_ROUNDING
+ /* NB: Let the backend known the number of bytes to push and
+ decide if push insns should be generated. */
+ unsigned int push_size;
+ if (CONST_INT_P (size))
+ push_size = INTVAL (size);
+ else
+ push_size = 0;
+
/* Do it with several push insns if that doesn't take lots of insns
and if there is no difficulty with push insns that skip bytes
on the stack for alignment purposes. */
if (args_addr == 0
- && PUSH_ARGS
+ && targetm.calls.push_argument (push_size)
&& CONST_INT_P (size)
&& skip == 0
&& MEM_ALIGN (xinner) >= align
@@ -4747,7 +4962,7 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
anti_adjust_stack (gen_int_mode (extra, Pmode));
#ifdef PUSH_ROUNDING
- if (args_addr == 0 && PUSH_ARGS)
+ if (args_addr == 0 && targetm.calls.push_argument (0))
emit_single_push_insn (mode, x, type);
else
#endif
@@ -5457,6 +5672,7 @@ expand_assignment (tree to, tree from, bool nontemporal)
/* If to_rtx is a promoted subreg, we need to zero or sign
extend the value afterwards. */
if (TREE_CODE (to) == MEM_REF
+ && TYPE_MODE (TREE_TYPE (from)) != BLKmode
&& !REF_REVERSE_STORAGE_ORDER (to)
&& known_eq (bitpos, 0)
&& known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (to_rtx))))
@@ -5643,7 +5859,8 @@ emit_storent_insn (rtx to, rtx from)
/* Helper function for store_expr storing of STRING_CST. */
static rtx
-string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode)
+string_cst_read_str (void *data, void *, HOST_WIDE_INT offset,
+ fixed_size_mode mode)
{
tree str = (tree) data;
@@ -5658,10 +5875,13 @@ string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode)
size_t l = TREE_STRING_LENGTH (str) - offset;
memcpy (p, TREE_STRING_POINTER (str) + offset, l);
memset (p + l, '\0', GET_MODE_SIZE (mode) - l);
- return c_readstr (p, mode, false);
+ return c_readstr (p, as_a <scalar_int_mode> (mode), false);
}
- return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false);
+ /* The by-pieces infrastructure does not try to pick a vector mode
+ for storing STRING_CST. */
+ return c_readstr (TREE_STRING_POINTER (str) + offset,
+ as_a <scalar_int_mode> (mode), false);
}
/* Generate code for computing expression EXP,
@@ -6967,7 +7187,8 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
&& eltmode == GET_MODE_INNER (mode)
&& ((icode = optab_handler (vec_duplicate_optab, mode))
!= CODE_FOR_nothing)
- && (elt = uniform_vector_p (exp)))
+ && (elt = uniform_vector_p (exp))
+ && !VECTOR_TYPE_P (TREE_TYPE (elt)))
{
class expand_operand ops[2];
create_output_operand (&ops[0], target, mode);
@@ -7016,7 +7237,9 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
/* Compute the size of the elements in the CTOR. It differs
from the size of the vector type elements only when the
CTOR elements are vectors themselves. */
- tree val_type = TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value);
+ tree val_type = (CONSTRUCTOR_NELTS (exp) != 0
+ ? TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value)
+ : elttype);
if (VECTOR_TYPE_P (val_type))
bitsize = tree_to_uhwi (TYPE_SIZE (val_type));
else
@@ -7211,7 +7434,13 @@ store_field (rtx target, poly_int64 bitsize, poly_int64 bitpos,
|| !multiple_p (bitpos, BITS_PER_UNIT)
|| !poly_int_tree_p (DECL_SIZE (TREE_OPERAND (exp, 1)),
&decl_bitsize)
- || maybe_ne (decl_bitsize, bitsize)))
+ || maybe_ne (decl_bitsize, bitsize))
+ /* A call with an addressable return type and return-slot
+ optimization must not need bitfield operations but we must
+ pass down the original target. */
+ && (TREE_CODE (exp) != CALL_EXPR
+ || !TREE_ADDRESSABLE (TREE_TYPE (exp))
+ || !CALL_EXPR_RETURN_SLOT_OPT (exp)))
/* If we are expanding a MEM_REF of a non-BLKmode non-addressable
decl we must use bitfield operations. */
|| (known_size_p (bitsize)
@@ -8195,7 +8424,7 @@ expand_expr_addr_expr_1 (tree exp, rtx target, scalar_int_mode tmode,
array with address of COMPOUND_LITERAL_EXPR in DECL_INITIAL;
the initializers aren't gimplified. */
if (COMPOUND_LITERAL_EXPR_DECL (exp)
- && TREE_STATIC (COMPOUND_LITERAL_EXPR_DECL (exp)))
+ && is_global_var (COMPOUND_LITERAL_EXPR_DECL (exp)))
return expand_expr_addr_expr_1 (COMPOUND_LITERAL_EXPR_DECL (exp),
target, tmode, modifier, as);
/* FALLTHRU */
@@ -8412,6 +8641,19 @@ expand_constructor (tree exp, rtx target, enum expand_modifier modifier,
return constructor;
}
+ /* If the CTOR is available in static storage and not mostly
+ zeros and we can move it by pieces prefer to do so since
+ that's usually more efficient than performing a series of
+ stores from immediates. */
+ if (avoid_temp_mem
+ && TREE_STATIC (exp)
+ && TREE_CONSTANT (exp)
+ && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type))
+ && can_move_by_pieces (tree_to_uhwi (TYPE_SIZE_UNIT (type)),
+ TYPE_ALIGN (type))
+ && ! mostly_zeros_p (exp))
+ return NULL_RTX;
+
/* Handle calls that pass values in multiple non-contiguous
locations. The Irix 6 ABI has examples of this. */
if (target == 0 || ! safe_from_p (target, exp, 1)
@@ -8653,6 +8895,56 @@ expand_misaligned_mem_ref (rtx temp, machine_mode mode, int unsignedp,
return temp;
}
+/* Helper function of expand_expr_2, expand a division or modulo.
+ op0 and op1 should be already expanded treeop0 and treeop1, using
+ expand_operands. */
+
+static rtx
+expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0,
+ tree treeop1, rtx op0, rtx op1, rtx target, int unsignedp)
+{
+ bool mod_p = (code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR
+ || code == CEIL_MOD_EXPR || code == ROUND_MOD_EXPR);
+ if (SCALAR_INT_MODE_P (mode)
+ && optimize >= 2
+ && get_range_pos_neg (treeop0) == 1
+ && get_range_pos_neg (treeop1) == 1)
+ {
+ /* If both arguments are known to be positive when interpreted
+ as signed, we can expand it as both signed and unsigned
+ division or modulo. Choose the cheaper sequence in that case. */
+ bool speed_p = optimize_insn_for_speed_p ();
+ do_pending_stack_adjust ();
+ start_sequence ();
+ rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1);
+ rtx_insn *uns_insns = get_insns ();
+ end_sequence ();
+ start_sequence ();
+ rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0);
+ rtx_insn *sgn_insns = get_insns ();
+ end_sequence ();
+ unsigned uns_cost = seq_cost (uns_insns, speed_p);
+ unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
+
+ /* If costs are the same then use as tie breaker the other other
+ factor. */
+ if (uns_cost == sgn_cost)
+ {
+ uns_cost = seq_cost (uns_insns, !speed_p);
+ sgn_cost = seq_cost (sgn_insns, !speed_p);
+ }
+
+ if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
+ {
+ emit_insn (uns_insns);
+ return uns_ret;
+ }
+ emit_insn (sgn_insns);
+ return sgn_ret;
+ }
+ return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
+}
+
rtx
expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
enum expand_modifier modifier)
@@ -9190,14 +9482,78 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
if (!REG_P (op0))
op0 = copy_to_mode_reg (mode, op0);
- return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0,
- gen_int_mode (tree_to_shwi (exp1),
- TYPE_MODE (TREE_TYPE (exp1)))));
+ op1 = gen_int_mode (tree_to_shwi (exp1),
+ TYPE_MODE (TREE_TYPE (exp1)));
+ return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0, op1));
}
if (modifier == EXPAND_STACK_PARM)
target = 0;
+ if (SCALAR_INT_MODE_P (mode) && optimize >= 2)
+ {
+ gimple *def_stmt0 = get_def_for_expr (treeop0, TRUNC_DIV_EXPR);
+ gimple *def_stmt1 = get_def_for_expr (treeop1, TRUNC_DIV_EXPR);
+ if (def_stmt0
+ && !operand_equal_p (treeop1, gimple_assign_rhs2 (def_stmt0), 0))
+ def_stmt0 = NULL;
+ if (def_stmt1
+ && !operand_equal_p (treeop0, gimple_assign_rhs2 (def_stmt1), 0))
+ def_stmt1 = NULL;
+
+ if (def_stmt0 || def_stmt1)
+ {
+ /* X / Y * Y can be expanded as X - X % Y too.
+ Choose the cheaper sequence of those two. */
+ if (def_stmt0)
+ treeop0 = gimple_assign_rhs1 (def_stmt0);
+ else
+ {
+ treeop1 = treeop0;
+ treeop0 = gimple_assign_rhs1 (def_stmt1);
+ }
+ expand_operands (treeop0, treeop1, subtarget, &op0, &op1,
+ EXPAND_NORMAL);
+ bool speed_p = optimize_insn_for_speed_p ();
+ do_pending_stack_adjust ();
+ start_sequence ();
+ rtx divmul_ret
+ = expand_expr_divmod (TRUNC_DIV_EXPR, mode, treeop0, treeop1,
+ op0, op1, NULL_RTX, unsignedp);
+ divmul_ret = expand_mult (mode, divmul_ret, op1, target,
+ unsignedp);
+ rtx_insn *divmul_insns = get_insns ();
+ end_sequence ();
+ start_sequence ();
+ rtx modsub_ret
+ = expand_expr_divmod (TRUNC_MOD_EXPR, mode, treeop0, treeop1,
+ op0, op1, NULL_RTX, unsignedp);
+ this_optab = optab_for_tree_code (MINUS_EXPR, type,
+ optab_default);
+ modsub_ret = expand_binop (mode, this_optab, op0, modsub_ret,
+ target, unsignedp, OPTAB_LIB_WIDEN);
+ rtx_insn *modsub_insns = get_insns ();
+ end_sequence ();
+ unsigned divmul_cost = seq_cost (divmul_insns, speed_p);
+ unsigned modsub_cost = seq_cost (modsub_insns, speed_p);
+ /* If costs are the same then use as tie breaker the other other
+ factor. */
+ if (divmul_cost == modsub_cost)
+ {
+ divmul_cost = seq_cost (divmul_insns, !speed_p);
+ modsub_cost = seq_cost (modsub_insns, !speed_p);
+ }
+
+ if (divmul_cost <= modsub_cost)
+ {
+ emit_insn (divmul_insns);
+ return REDUCE_BIT_FIELD (divmul_ret);
+ }
+ emit_insn (modsub_insns);
+ return REDUCE_BIT_FIELD (modsub_ret);
+ }
+ }
+
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
@@ -9211,61 +9567,21 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
case CEIL_DIV_EXPR:
case ROUND_DIV_EXPR:
case EXACT_DIV_EXPR:
- {
- /* If this is a fixed-point operation, then we cannot use the code
- below because "expand_divmod" doesn't support sat/no-sat fixed-point
- divisions. */
- if (ALL_FIXED_POINT_MODE_P (mode))
- goto binop;
-
- if (modifier == EXPAND_STACK_PARM)
- target = 0;
- /* Possible optimization: compute the dividend with EXPAND_SUM
- then if the divisor is constant can optimize the case
- where some terms of the dividend have coeffs divisible by it. */
- expand_operands (treeop0, treeop1,
- subtarget, &op0, &op1, EXPAND_NORMAL);
- bool mod_p = code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR
- || code == CEIL_MOD_EXPR || code == ROUND_MOD_EXPR;
- if (SCALAR_INT_MODE_P (mode)
- && optimize >= 2
- && get_range_pos_neg (treeop0) == 1
- && get_range_pos_neg (treeop1) == 1)
- {
- /* If both arguments are known to be positive when interpreted
- as signed, we can expand it as both signed and unsigned
- division or modulo. Choose the cheaper sequence in that case. */
- bool speed_p = optimize_insn_for_speed_p ();
- do_pending_stack_adjust ();
- start_sequence ();
- rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1);
- rtx_insn *uns_insns = get_insns ();
- end_sequence ();
- start_sequence ();
- rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0);
- rtx_insn *sgn_insns = get_insns ();
- end_sequence ();
- unsigned uns_cost = seq_cost (uns_insns, speed_p);
- unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
-
- /* If costs are the same then use as tie breaker the other
- other factor. */
- if (uns_cost == sgn_cost)
- {
- uns_cost = seq_cost (uns_insns, !speed_p);
- sgn_cost = seq_cost (sgn_insns, !speed_p);
- }
-
- if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
- {
- emit_insn (uns_insns);
- return uns_ret;
- }
- emit_insn (sgn_insns);
- return sgn_ret;
- }
- return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
- }
+ /* If this is a fixed-point operation, then we cannot use the code
+ below because "expand_divmod" doesn't support sat/no-sat fixed-point
+ divisions. */
+ if (ALL_FIXED_POINT_MODE_P (mode))
+ goto binop;
+
+ if (modifier == EXPAND_STACK_PARM)
+ target = 0;
+ /* Possible optimization: compute the dividend with EXPAND_SUM
+ then if the divisor is constant can optimize the case
+ where some terms of the dividend have coeffs divisible by it. */
+ expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
+ return expand_expr_divmod (code, mode, treeop0, treeop1, op0, op1,
+ target, unsignedp);
+
case RDIV_EXPR:
goto binop;
@@ -9800,6 +10116,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
case VEC_PACK_SAT_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
mode = TYPE_MODE (TREE_TYPE (treeop0));
+ subtarget = NULL_RTX;
goto binop;
case VEC_PACK_TRUNC_EXPR:
@@ -9823,6 +10140,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
return eops[0].value;
}
mode = TYPE_MODE (TREE_TYPE (treeop0));
+ subtarget = NULL_RTX;
goto binop;
case VEC_PACK_FLOAT_EXPR:
@@ -10221,8 +10539,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
exp = SSA_NAME_VAR (ssa_name);
goto expand_decl_rtl;
- case PARM_DECL:
case VAR_DECL:
+ /* Allow accel compiler to handle variables that require special
+ treatment, e.g. if they have been modified in some way earlier in
+ compilation by the adjust_private_decl OpenACC hook. */
+ if (flag_openacc && targetm.goacc.expand_var_decl)
+ {
+ temp = targetm.goacc.expand_var_decl (exp);
+ if (temp)
+ return temp;
+ }
+ /* ... fall through ... */
+
+ case PARM_DECL:
/* If a static var's type was incomplete when the decl was written,
but the type is complete now, lay out the decl now. */
if (DECL_SIZE (exp) == 0
@@ -10764,61 +11093,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
goto normal_inner_ref;
case COMPONENT_REF:
- /* If the operand is a CONSTRUCTOR, we can just extract the
- appropriate field if it is present. */
- if (TREE_CODE (treeop0) == CONSTRUCTOR)
- {
- unsigned HOST_WIDE_INT idx;
- tree field, value;
- scalar_int_mode field_mode;
-
- FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (treeop0),
- idx, field, value)
- if (field == treeop1
- /* We can normally use the value of the field in the
- CONSTRUCTOR. However, if this is a bitfield in
- an integral mode that we can fit in a HOST_WIDE_INT,
- we must mask only the number of bits in the bitfield,
- since this is done implicitly by the constructor. If
- the bitfield does not meet either of those conditions,
- we can't do this optimization. */
- && (! DECL_BIT_FIELD (field)
- || (is_int_mode (DECL_MODE (field), &field_mode)
- && (GET_MODE_PRECISION (field_mode)
- <= HOST_BITS_PER_WIDE_INT))))
- {
- if (DECL_BIT_FIELD (field)
- && modifier == EXPAND_STACK_PARM)
- target = 0;
- op0 = expand_expr (value, target, tmode, modifier);
- if (DECL_BIT_FIELD (field))
- {
- HOST_WIDE_INT bitsize = TREE_INT_CST_LOW (DECL_SIZE (field));
- scalar_int_mode imode
- = SCALAR_INT_TYPE_MODE (TREE_TYPE (field));
-
- if (TYPE_UNSIGNED (TREE_TYPE (field)))
- {
- op1 = gen_int_mode ((HOST_WIDE_INT_1 << bitsize) - 1,
- imode);
- op0 = expand_and (imode, op0, op1, target);
- }
- else
- {
- int count = GET_MODE_PRECISION (imode) - bitsize;
-
- op0 = expand_shift (LSHIFT_EXPR, imode, op0, count,
- target, 0);
- op0 = expand_shift (RSHIFT_EXPR, imode, op0, count,
- target, 0);
- }
- }
-
- return op0;
- }
- }
- goto normal_inner_ref;
-
+ gcc_assert (TREE_CODE (treeop0) != CONSTRUCTOR);
+ /* Fall through. */
case BIT_FIELD_REF:
case ARRAY_RANGE_REF:
normal_inner_ref:
@@ -11235,7 +11511,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
/* All valid uses of __builtin_va_arg_pack () are removed during
inlining. */
if (CALL_EXPR_VA_ARG_PACK (exp))
- error ("%Kinvalid use of %<__builtin_va_arg_pack ()%>", exp);
+ error ("invalid use of %<__builtin_va_arg_pack ()%>");
{
tree fndecl = get_callee_fndecl (exp), attr;
@@ -11247,7 +11523,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
DECL_ATTRIBUTES (fndecl))) != NULL)
{
const char *ident = lang_hooks.decl_printable_name (fndecl, 1);
- error ("%Kcall to %qs declared with attribute error: %s", exp,
+ error ("call to %qs declared with attribute error: %s",
identifier_to_locale (ident),
TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))));
}
@@ -11259,10 +11535,10 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
DECL_ATTRIBUTES (fndecl))) != NULL)
{
const char *ident = lang_hooks.decl_printable_name (fndecl, 1);
- warning_at (tree_nonartificial_location (exp),
+ warning_at (EXPR_LOCATION (exp),
OPT_Wattribute_warning,
- "%Kcall to %qs declared with attribute warning: %s",
- exp, identifier_to_locale (ident),
+ "call to %qs declared with attribute warning: %s",
+ identifier_to_locale (ident),
TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))));
}
@@ -12275,6 +12551,37 @@ maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
*arg1 = c4;
return code == EQ_EXPR ? LE_EXPR : GT_EXPR;
}
+
+/* Optimize x - y < 0 into x < 0 if x - y has undefined overflow. */
+
+void
+maybe_optimize_sub_cmp_0 (enum tree_code code, tree *arg0, tree *arg1)
+{
+ gcc_checking_assert (code == GT_EXPR || code == GE_EXPR
+ || code == LT_EXPR || code == LE_EXPR);
+ gcc_checking_assert (integer_zerop (*arg1));
+
+ if (!optimize)
+ return;
+
+ gimple *stmt = get_def_for_expr (*arg0, MINUS_EXPR);
+ if (stmt == NULL)
+ return;
+
+ tree treeop0 = gimple_assign_rhs1 (stmt);
+ tree treeop1 = gimple_assign_rhs2 (stmt);
+ if (!TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (treeop0)))
+ return;
+
+ if (issue_strict_overflow_warning (WARN_STRICT_OVERFLOW_COMPARISON))
+ warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
+ "assuming signed overflow does not occur when "
+ "simplifying %<X - Y %s 0%> to %<X %s Y%>",
+ op_symbol_code (code), op_symbol_code (code));
+
+ *arg0 = treeop0;
+ *arg1 = treeop1;
+}
/* Generate code to calculate OPS, and exploded expression
using a store-flag instruction and return an rtx for the result.
@@ -12363,6 +12670,14 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
}
}
+ /* Optimize (x - y) < 0 into x < y if x - y has undefined overflow. */
+ if (!unsignedp
+ && (ops->code == LT_EXPR || ops->code == LE_EXPR
+ || ops->code == GT_EXPR || ops->code == GE_EXPR)
+ && integer_zerop (arg1)
+ && TREE_CODE (arg0) == SSA_NAME)
+ maybe_optimize_sub_cmp_0 (ops->code, &arg0, &arg1);
+
/* Get the rtx comparison code to use. We know that EXP is a comparison
operation of some type. Some comparisons against 1 and -1 can be
converted to comparisons with zero. Do so here so that the tests