From eafeba3e5187a53a4c08a3285b4b220e1ab68b60 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 16 Apr 2020 09:37:44 +0200 Subject: middle-end/94614 - avoid multiword moves to nothing This adjusts emit_move_multi_word to handle moves into paradoxical subregs parts that are not there and adjusts lower-subregs CLOBBER resolving to deal with those as well. 2020-04-16 Richard Biener PR middle-end/94614 * expr.c (emit_move_multi_word): Do not generate code when the destination part is undefined_operand_subword_p. * lower-subreg.c (resolve_clobber): Look through a paradoxica subreg. --- gcc/expr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'gcc/expr.c') diff --git a/gcc/expr.c b/gcc/expr.c index b97c217..dfbeae7 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -3692,6 +3692,11 @@ emit_move_multi_word (machine_mode mode, rtx x, rtx y) need_clobber = false; for (i = 0; i < CEIL (mode_size, UNITS_PER_WORD); i++) { + /* Do not generate code for a move if it would go entirely + to the non-existing bits of a paradoxical subreg. */ + if (undefined_operand_subword_p (x, i)) + continue; + rtx xpart = operand_subword (x, i, 1, mode); rtx ypart; -- cgit v1.1 From 43a4fc095e30188392cc42299c4081297e321104 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 29 May 2020 10:42:50 +0200 Subject: expander: Optimize store_expr from STRING_CST [PR95052] In the following testcase, store_expr of e.g. 97 bytes long string literal into 1MB long array is implemented by copying the 97 bytes from .rodata section, followed by clearing the remaining bytes. But, as the STRING_CST has type char[1024*1024], we actually allocate whole 1MB in .rodata section for it, even when we only use the first 97 bytes from that. The following patch tweaks it so that if we are going to initialize only the small part from it, we don't emit all the zeros that we never use after it. 2020-05-29 Jakub Jelinek PR middle-end/95052 * expr.c (store_expr): If expr_size is constant and significantly larger than TREE_STRING_LENGTH, set temp to just the TREE_STRING_LENGTH portion of the STRING_CST. * gcc.target/i386/pr95052.c: New test. --- gcc/expr.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'gcc/expr.c') diff --git a/gcc/expr.c b/gcc/expr.c index dfbeae7..049d3ed 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -5583,6 +5583,7 @@ store_expr (tree exp, rtx target, int call_param_p, rtx temp; rtx alt_rtl = NULL_RTX; location_t loc = curr_insn_location (); + bool shortened_string_cst = false; if (VOID_TYPE_P (TREE_TYPE (exp))) { @@ -5749,7 +5750,32 @@ store_expr (tree exp, rtx target, int call_param_p, /* If we want to use a nontemporal or a reverse order store, force the value into a register first. */ tmp_target = nontemporal || reverse ? NULL_RTX : target; - temp = expand_expr_real (exp, tmp_target, GET_MODE (target), + tree rexp = exp; + if (TREE_CODE (exp) == STRING_CST + && tmp_target == target + && GET_MODE (target) == BLKmode + && TYPE_MODE (TREE_TYPE (exp)) == BLKmode) + { + rtx size = expr_size (exp); + if (CONST_INT_P (size) + && size != const0_rtx + && (UINTVAL (size) + > ((unsigned HOST_WIDE_INT) TREE_STRING_LENGTH (exp) + 32))) + { + /* If the STRING_CST has much larger array type than + TREE_STRING_LENGTH, only emit the TREE_STRING_LENGTH part of + it into the rodata section as the code later on will use + memset zero for the remainder anyway. See PR95052. */ + tmp_target = NULL_RTX; + rexp = copy_node (exp); + tree index + = build_index_type (size_int (TREE_STRING_LENGTH (exp) - 1)); + TREE_TYPE (rexp) = build_array_type (TREE_TYPE (TREE_TYPE (exp)), + index); + shortened_string_cst = true; + } + } + temp = expand_expr_real (rexp, tmp_target, GET_MODE (target), (call_param_p ? EXPAND_STACK_PARM : EXPAND_NORMAL), &alt_rtl, false); @@ -5763,6 +5789,7 @@ store_expr (tree exp, rtx target, int call_param_p, && TREE_CODE (exp) != ERROR_MARK && GET_MODE (target) != TYPE_MODE (TREE_TYPE (exp))) { + gcc_assert (!shortened_string_cst); if (GET_MODE_CLASS (GET_MODE (target)) != GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (exp))) && known_eq (GET_MODE_BITSIZE (GET_MODE (target)), @@ -5815,6 +5842,7 @@ store_expr (tree exp, rtx target, int call_param_p, { if (GET_MODE (temp) != GET_MODE (target) && GET_MODE (temp) != VOIDmode) { + gcc_assert (!shortened_string_cst); if (GET_MODE (target) == BLKmode) { /* Handle calls that return BLKmode values in registers. */ @@ -5900,6 +5928,8 @@ store_expr (tree exp, rtx target, int call_param_p, emit_label (label); } } + else if (shortened_string_cst) + gcc_unreachable (); /* Handle calls that return values in multiple non-contiguous locations. The Irix 6 ABI has examples of this. */ else if (GET_CODE (target) == PARALLEL) @@ -5929,6 +5959,8 @@ store_expr (tree exp, rtx target, int call_param_p, emit_move_insn (target, temp); } } + else + gcc_assert (!shortened_string_cst); return NULL_RTX; } -- cgit v1.1 From dc8c02ca1cd18f8c22d70cf17b47125fc25ab243 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sun, 31 May 2020 10:45:21 +0200 Subject: expr: Fix fallout from optimize store_expr from STRING_CST [PR95052] > Can't hurt, and debugging the assert tripping is likely a hell of a lot easier > than debugging the resultant incorrect code. So if it passes, then I'd say go > for it. Testing passed, so I've committed it with those asserts (and thankfully I've added them!) but it apparently broke Linux kernel build on arm. The problem is that if the STRING_CST is very short, while the full object has BLKmode, the short string could very well have QImode/HImode/SImode/DImode and in that case it wouldn't take the path that copies the string and then clears the remaining space, but different paths in which it will ICE because of those asserts and without those it would just emit wrong-code. The following patch fixes it by enforcing BLKmode for the string MEM, even if it is short, so that we copy it and memset the rest. 2020-05-31 Jakub Jelinek PR middle-end/95052 * expr.c (store_expr): For shortedned_string_cst, ensure temp has BLKmode. * gcc.dg/pr95052.c: New test. --- gcc/expr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'gcc/expr.c') diff --git a/gcc/expr.c b/gcc/expr.c index 049d3ed..6b75028 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -5779,6 +5779,11 @@ store_expr (tree exp, rtx target, int call_param_p, (call_param_p ? EXPAND_STACK_PARM : EXPAND_NORMAL), &alt_rtl, false); + if (shortened_string_cst) + { + gcc_assert (MEM_P (temp)); + temp = change_address (temp, BLKmode, NULL_RTX); + } } /* If TEMP is a VOIDmode constant and the mode of the type of EXP is not -- cgit v1.1 From 9a182ef9ee011935d827ab5c6c9a7cd8e22257d8 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Fri, 5 Jun 2020 10:34:59 +0100 Subject: expand: Simplify removing subregs when expanding a copy [PR95254] In rtl expand, if we have a copy that matches one of the following patterns: (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...))) (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR)) (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...))) (set (subreg:M1 (reg:M2 ...)) (constant C)) where mode M1 is equal in size to M2, try to detect whether the mode change involves an implicit round trip through memory. If so, see if we can avoid that by removing the subregs and doing the move in mode M2 instead. 2020-06-05 Felix Yang gcc/ PR target/95254 * expr.c (emit_move_insn): Check src and dest of the copy to see if one or both of them are subregs, try to remove the subregs when innermode and outermode are equal in size and the mode change involves an implicit round trip through memory. gcc/testsuite/ PR target/95254 * gcc.target/aarch64/pr95254.c: New test. * gcc.target/i386/pr67609.c: Check "movq\t%xmm0" instead of "movdqa". --- gcc/expr.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'gcc/expr.c') diff --git a/gcc/expr.c b/gcc/expr.c index 6b75028..ca6b1c1 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -3814,6 +3814,80 @@ emit_move_insn (rtx x, rtx y) gcc_assert (mode != BLKmode && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode)); + /* If we have a copy that looks like one of the following patterns: + (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...))) + (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR)) + (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...))) + (set (subreg:M1 (reg:M2 ...)) (constant C)) + where mode M1 is equal in size to M2, try to detect whether the + mode change involves an implicit round trip through memory. + If so, see if we can avoid that by removing the subregs and + doing the move in mode M2 instead. */ + + rtx x_inner = NULL_RTX; + rtx y_inner = NULL_RTX; + + auto candidate_subreg_p = [&](rtx subreg) { + return (REG_P (SUBREG_REG (subreg)) + && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), + GET_MODE_SIZE (GET_MODE (subreg))) + && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) + != CODE_FOR_nothing); + }; + + auto candidate_mem_p = [&](machine_mode innermode, rtx mem) { + return (!targetm.can_change_mode_class (innermode, GET_MODE (mem), ALL_REGS) + && !push_operand (mem, GET_MODE (mem)) + /* Not a candiate if innermode requires too much alignment. */ + && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode) + || targetm.slow_unaligned_access (GET_MODE (mem), + MEM_ALIGN (mem)) + || !targetm.slow_unaligned_access (innermode, + MEM_ALIGN (mem)))); + }; + + if (SUBREG_P (x) && candidate_subreg_p (x)) + x_inner = SUBREG_REG (x); + + if (SUBREG_P (y) && candidate_subreg_p (y)) + y_inner = SUBREG_REG (y); + + if (x_inner != NULL_RTX + && y_inner != NULL_RTX + && GET_MODE (x_inner) == GET_MODE (y_inner) + && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS)) + { + x = x_inner; + y = y_inner; + mode = GET_MODE (x_inner); + } + else if (x_inner != NULL_RTX + && MEM_P (y) + && candidate_mem_p (GET_MODE (x_inner), y)) + { + x = x_inner; + y = adjust_address (y, GET_MODE (x_inner), 0); + mode = GET_MODE (x_inner); + } + else if (y_inner != NULL_RTX + && MEM_P (x) + && candidate_mem_p (GET_MODE (y_inner), x)) + { + x = adjust_address (x, GET_MODE (y_inner), 0); + y = y_inner; + mode = GET_MODE (y_inner); + } + else if (x_inner != NULL_RTX + && CONSTANT_P (y) + && !targetm.can_change_mode_class (GET_MODE (x_inner), + mode, ALL_REGS) + && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0))) + { + x = x_inner; + y = y_inner; + mode = GET_MODE (x_inner); + } + if (CONSTANT_P (y)) { if (optimize -- cgit v1.1 From 502d63b6d6141597bb18fd23c87736a1b384cf8f Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Mon, 9 Mar 2020 13:23:03 +0100 Subject: Lower VEC_COND_EXPR into internal functions. gcc/ChangeLog: * Makefile.in: Add new file. * expr.c (expand_expr_real_2): Add gcc_unreachable as we should not meet this condition. (do_store_flag): Likewise. * gimplify.c (gimplify_expr): Gimplify first argument of VEC_COND_EXPR to be a SSA name. * internal-fn.c (vec_cond_mask_direct): New. (vec_cond_direct): Likewise. (vec_condu_direct): Likewise. (vec_condeq_direct): Likewise. (expand_vect_cond_optab_fn): New. (expand_vec_cond_optab_fn): Likewise. (expand_vec_condu_optab_fn): Likewise. (expand_vec_condeq_optab_fn): Likewise. (expand_vect_cond_mask_optab_fn): Likewise. (expand_vec_cond_mask_optab_fn): Likewise. (direct_vec_cond_mask_optab_supported_p): Likewise. (direct_vec_cond_optab_supported_p): Likewise. (direct_vec_condu_optab_supported_p): Likewise. (direct_vec_condeq_optab_supported_p): Likewise. * internal-fn.def (VCOND): New OPTAB. (VCONDU): Likewise. (VCONDEQ): Likewise. (VCOND_MASK): Likewise. * optabs.c (get_rtx_code): Make it global. (expand_vec_cond_mask_expr): Removed. (expand_vec_cond_expr): Removed. * optabs.h (expand_vec_cond_expr): Likewise. (vector_compare_rtx): Make it global. * passes.def: Add new pass_gimple_isel pass. * tree-cfg.c (verify_gimple_assign_ternary): Add check for VEC_COND_EXPR about first argument. * tree-pass.h (make_pass_gimple_isel): New. * tree-ssa-forwprop.c (pass_forwprop::execute): Prevent propagation of the first argument of a VEC_COND_EXPR. * tree-ssa-reassoc.c (ovce_extract_ops): Support SSA_NAME as first argument of a VEC_COND_EXPR. (optimize_vec_cond_expr): Likewise. * tree-vect-generic.c (expand_vector_divmod): Make SSA_NAME for a first argument of created VEC_COND_EXPR. (expand_vector_condition): Fix coding style. * tree-vect-stmts.c (vectorizable_condition): Gimplify first argument. * gimple-isel.cc: New file. gcc/testsuite/ChangeLog: * g++.dg/vect/vec-cond-expr-eh.C: New test. --- gcc/expr.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) (limited to 'gcc/expr.c') diff --git a/gcc/expr.c b/gcc/expr.c index ca6b1c1..3c68b0d 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -9316,17 +9316,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, if (temp != 0) return temp; - /* For vector MIN , expand it a VEC_COND_EXPR - and similarly for MAX . */ if (VECTOR_TYPE_P (type)) - { - tree t0 = make_tree (type, op0); - tree t1 = make_tree (type, op1); - tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR, - type, t0, t1); - return expand_vec_cond_expr (type, comparison, t0, t1, - original_target); - } + gcc_unreachable (); /* At this point, a MEM target is no longer useful; we will get better code without it. */ @@ -9915,10 +9906,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, return temp; } - case VEC_COND_EXPR: - target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target); - return target; - case VEC_DUPLICATE_EXPR: op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier); target = expand_vector_broadcast (mode, op0); @@ -12249,8 +12236,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode) STRIP_NOPS (arg1); /* For vector typed comparisons emit code to generate the desired - all-ones or all-zeros mask. Conveniently use the VEC_COND_EXPR - expander for this. */ + all-ones or all-zeros mask. */ if (TREE_CODE (ops->type) == VECTOR_TYPE) { tree ifexp = build2 (ops->code, ops->type, arg0, arg1); @@ -12258,12 +12244,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode) && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code)) return expand_vec_cmp_expr (ops->type, ifexp, target); else - { - tree if_true = constant_boolean_node (true, ops->type); - tree if_false = constant_boolean_node (false, ops->type); - return expand_vec_cond_expr (ops->type, ifexp, if_true, - if_false, target); - } + gcc_unreachable (); } /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial -- cgit v1.1 From 760df6d296b8fc59796f42dca5eb14012fbfa28b Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 8 Jul 2020 15:01:14 +0100 Subject: expr: Fix REDUCE_BIT_FIELD for constants [PR95694] This is yet another PR caused by constant integer rtxes not storing a mode. We were calling REDUCE_BIT_FIELD on a constant integer that didn't fit in poly_int64, and then tripped the as_a assert on VOIDmode. AFAICT REDUCE_BIT_FIELD is always passed rtxes that have TYPE_MODE (rather than some other mode) and it just fills in the redundant sign bits of that TYPE_MODE value. So it should be safe to get the mode from the type instead of the rtx. The patch does that and asserts that the modes agree, where information is available. That on its own is enough to fix the bug, but we might as well extend the folding case to all constant integers, not just those that fit poly_int64. gcc/ PR middle-end/95694 * expr.c (expand_expr_real_2): Get the mode from the type rather than the rtx, and assert that it is consistent with the mode of the rtx (where known). Optimize all constant integers, not just those that can be represented in poly_int64. gcc/testsuite/ PR middle-end/95694 * gcc.dg/pr95694.c: New test. --- gcc/expr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'gcc/expr.c') diff --git a/gcc/expr.c b/gcc/expr.c index 3c68b0d..715edae 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -11525,26 +11525,27 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, static rtx reduce_to_bit_field_precision (rtx exp, rtx target, tree type) { + scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type); HOST_WIDE_INT prec = TYPE_PRECISION (type); - if (target && GET_MODE (target) != GET_MODE (exp)) + gcc_assert (GET_MODE (exp) == VOIDmode || GET_MODE (exp) == mode); + if (target && GET_MODE (target) != mode) target = 0; - /* For constant values, reduce using build_int_cst_type. */ - poly_int64 const_exp; - if (poly_int_rtx_p (exp, &const_exp)) + + /* For constant values, reduce using wide_int_to_tree. */ + if (poly_int_rtx_p (exp)) { - tree t = build_int_cst_type (type, const_exp); + auto value = wi::to_poly_wide (exp, mode); + tree t = wide_int_to_tree (type, value); return expand_expr (t, target, VOIDmode, EXPAND_NORMAL); } else if (TYPE_UNSIGNED (type)) { - scalar_int_mode mode = as_a (GET_MODE (exp)); rtx mask = immed_wide_int_const (wi::mask (prec, false, GET_MODE_PRECISION (mode)), mode); return expand_and (mode, exp, mask, target); } else { - scalar_int_mode mode = as_a (GET_MODE (exp)); int count = GET_MODE_PRECISION (mode) - prec; exp = expand_shift (LSHIFT_EXPR, mode, exp, count, target, 0); return expand_shift (RSHIFT_EXPR, mode, exp, count, target, 0); -- cgit v1.1 From 510125d2272175f47b26227fbe9b8c8c5abfd988 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 10 Jul 2020 19:06:46 +0100 Subject: expr: Move reduce_bit_field target mode check [PR96151] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases, expand_expr_real_2 prefers to use the mode of the caller-suggested target instead of the mode of the expression when passing values to reduce_to_bit_field_precision. E.g.: else if (target == 0) op0 = convert_to_mode (mode, op0, TYPE_UNSIGNED (TREE_TYPE (treeop0))); else { convert_move (target, op0, TYPE_UNSIGNED (TREE_TYPE (treeop0))); op0 = target; } where “op0” might not have “mode” for the “else” branch, but does for all the others. reduce_to_bit_field_precision discards the suggested target if it has the wrong mode. This patch moves that to expand_expr_real_2 instead (conditional on reduce_bit_field). gcc/ PR middle-end/96151 * expr.c (expand_expr_real_2): When reducing bit fields, clear the target if it has a different mode from the expression. (reduce_to_bit_field_precision): Don't do that here. Instead assert that the target already has the correct mode. --- gcc/expr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'gcc/expr.c') diff --git a/gcc/expr.c b/gcc/expr.c index 715edae..c7c3e9f 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8664,7 +8664,9 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, reduce_bit_field = (INTEGRAL_TYPE_P (type) && !type_has_mode_precision_p (type)); - if (reduce_bit_field && modifier == EXPAND_STACK_PARM) + if (reduce_bit_field + && (modifier == EXPAND_STACK_PARM + || (target && GET_MODE (target) != mode))) target = 0; /* Use subtarget as the target for operand 0 of a binary operation. */ @@ -11527,9 +11529,8 @@ reduce_to_bit_field_precision (rtx exp, rtx target, tree type) { scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type); HOST_WIDE_INT prec = TYPE_PRECISION (type); - gcc_assert (GET_MODE (exp) == VOIDmode || GET_MODE (exp) == mode); - if (target && GET_MODE (target) != mode) - target = 0; + gcc_assert ((GET_MODE (exp) == VOIDmode || GET_MODE (exp) == mode) + && (!target || GET_MODE (target) == mode)); /* For constant values, reduce using wide_int_to_tree. */ if (poly_int_rtx_p (exp)) -- cgit v1.1