diff options
Diffstat (limited to 'gcc')
55 files changed, 1241 insertions, 150 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d239c35..cbce913 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,114 @@ +2025-05-07 Jeff Law <jlaw@ventanamicro.com> + + PR target/120137 + PR target/120154 + * config/riscv/riscv-vect-permconst.cc (process_bb): Verify each + canonicalized element fits into the vector element mode. + +2025-05-07 Dongyan Chen <chendongyan@isrc.iscas.ac.cn> + + * common/config/riscv/riscv-common.cc: New extension. + * config/riscv/riscv.opt: Ditto. + +2025-05-07 Richard Earnshaw <rearnsha@arm.com> + + PR target/91323 + * config/arm/arm.cc (arm_select_cc_mode): Use CCFPEmode for LTGT. + +2025-05-07 Richard Earnshaw <rearnsha@arm.com> + + PR target/110796 + PR target/118446 + * config/arm/arm.h (REVERSIBLE_CC_MODE): FP modes are only + reversible if flag_finite_math_only. + * config/arm/arm.cc (arm_select_cc_mode): Return CCFPmode for all + FP comparisons if flag_finite_math_only. + +2025-05-07 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/111276 + * gimple-fold.cc (arith_code_with_undefined_signed_overflow): Make static. + (gimple_with_undefined_signed_overflow): New function. + * gimple-fold.h (arith_code_with_undefined_signed_overflow): Remove. + (gimple_with_undefined_signed_overflow): Add declaration. + * tree-if-conv.cc (if_convertible_gimple_assign_stmt_p): Use + gimple_with_undefined_signed_overflow instead of manually + checking lhs and the code of the stmt. + (predicate_statements): Likewise. + * tree-ssa-ifcombine.cc (ifcombine_rewrite_to_defined_overflow): Likewise. + * tree-ssa-loop-im.cc (move_computations_worker): Likewise. + * tree-ssa-reassoc.cc (update_range_test): Likewise. Reformat. + * tree-scalar-evolution.cc (final_value_replacement_loop): Use + gimple_with_undefined_signed_overflow instead of + arith_code_with_undefined_signed_overflow. + * tree-ssa-loop-split.cc (split_loop): Likewise. + +2025-05-07 Andrew Pinski <quic_apinski@quicinc.com> + + * tree-ssa-loop-im.cc (compute_invariantness): Hoist to the always executed point + if ignorning the cost. + +2025-05-07 Jan Hubicka <hubicka@ucw.cz> + + * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Add FLOAT_EXPR; + FIX_TRUNC_EXPR and vec_promote_demote costs. + +2025-05-07 Jennifer Schmitz <jschmitz@nvidia.com> + + PR target/117978 + * config/aarch64/aarch64-protos.h: Declare + aarch64_emit_load_store_through_mode and aarch64_sve_maskloadstore. + * config/aarch64/aarch64-sve.md + (maskload<mode><vpred>): New define_expand folding maskloads with + certain predicate patterns to ASIMD loads. + (*aarch64_maskload<mode><vpred>): Renamed from maskload<mode><vpred>. + (maskstore<mode><vpred>): New define_expand folding maskstores with + certain predicate patterns to ASIMD stores. + (*aarch64_maskstore<mode><vpred>): Renamed from maskstore<mode><vpred>. + * config/aarch64/aarch64.cc + (aarch64_emit_load_store_through_mode): New function emitting a + load/store through subregs of a given mode. + (aarch64_emit_sve_pred_move): Refactor to use + aarch64_emit_load_store_through_mode. + (aarch64_expand_maskloadstore): New function to emit ASIMD loads/stores + for maskloads/stores with SVE predicates with VL1, VL2, VL4, VL8, or + VL16 patterns. + (aarch64_partial_ptrue_length): New function returning number of leading + set bits in a predicate. + +2025-05-07 Stefan Schulze Frielinghaus <stefansf@gcc.gnu.org> + + * config/s390/s390-protos.h (s390_expand_cstoreti4): New + function. + * config/s390/s390.cc (s390_expand_cstoreti4): New function. + * config/s390/s390.md (CC_SUZ): New mode iterator. + (l): New mode attribute. + (cc_tolower): New mode attribute. + * config/s390/vector.md (cstoreti4): New expander. + (*vec_cmpv2di_lane0_<cc_tolower>): New insn. + (*vec_cmpti_<cc_tolower>): New insn. + +2025-05-07 H.J. Lu <hjl.tools@gmail.com> + + PR target/120036 + * config/i386/i386-features.cc (ix86_get_vector_load_mode): + Handle 8/4/2 bytes. + (remove_redundant_vector_load): If the mode size is smaller than + its natural size, first insert an extra move with a QI vector + SUBREG of the same size to avoid validate_subreg failure. + +2025-05-07 hongtao.liu <hongtao.liu@intel.com> + + PR gcov-profile/118508 + * auto-profile.cc + (autofdo_source_profile::get_callsite_total_count): Fix name + mismatch for fortran. + +2025-05-07 Jeff Law <jlaw@ventanamicro.com> + + * config/riscv/riscv.md (*branch<ANYI:mode>_shiftedarith_equals_zero): + Avoid generating unnecessary andi. Fix formatting. + 2025-05-06 Dongyan Chen <chendongyan@isrc.iscas.ac.cn> * common/config/riscv/riscv-common.cc: New extension. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 9687431..8cb3c2b 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250507 +20250508 diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 0233e1a..ca14eb9 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -327,6 +327,7 @@ static const struct riscv_ext_version riscv_ext_version_table[] = {"zalrsc", ISA_SPEC_CLASS_NONE, 1, 0}, {"zabha", ISA_SPEC_CLASS_NONE, 1, 0}, {"zacas", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zama16b", ISA_SPEC_CLASS_NONE, 1, 0}, {"zba", ISA_SPEC_CLASS_NONE, 1, 0}, {"zbb", ISA_SPEC_CLASS_NONE, 1, 0}, @@ -1657,6 +1658,7 @@ static const riscv_ext_flag_table_t riscv_ext_flag_table[] = RISCV_EXT_FLAG_ENTRY ("zalrsc", x_riscv_za_subext, MASK_ZALRSC), RISCV_EXT_FLAG_ENTRY ("zabha", x_riscv_za_subext, MASK_ZABHA), RISCV_EXT_FLAG_ENTRY ("zacas", x_riscv_za_subext, MASK_ZACAS), + RISCV_EXT_FLAG_ENTRY ("zama16b", x_riscv_za_subext, MASK_ZAMA16B), RISCV_EXT_FLAG_ENTRY ("zba", x_riscv_zb_subext, MASK_ZBA), RISCV_EXT_FLAG_ENTRY ("zbb", x_riscv_zb_subext, MASK_ZBB), diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 1ca86c9..c935e7b 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1026,6 +1026,8 @@ rtx aarch64_ptrue_reg (machine_mode, unsigned int); rtx aarch64_ptrue_reg (machine_mode, machine_mode); rtx aarch64_pfalse_reg (machine_mode); bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); +void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode); +bool aarch64_expand_maskloadstore (rtx *, machine_mode); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx); @@ -1053,6 +1055,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *, rtx *, rtx *, rtx *); void aarch64_expand_subvti (rtx, rtx, rtx, rtx, rtx, rtx, rtx, bool); +int aarch64_exact_log2_inverse (unsigned int, rtx); /* Initialize builtins for SIMD intrinsics. */ diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e2afe87..1099e74 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1193,12 +1193,14 @@ (define_insn "aarch64_simd_vec_set_zero<mode>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (vec_merge:VALL_F16 - (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "") - (match_operand:VALL_F16 3 "register_operand" "0") + (match_operand:VALL_F16 1 "register_operand" "0") + (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "") (match_operand:SI 2 "immediate_operand" "i")))] - "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0" + "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0" { - int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); + int elt = ENDIAN_LANE_N (<nunits>, + aarch64_exact_log2_inverse (<nunits>, + operands[2])); operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); return "ins\\t%0.<Vetype>[%p2], <vwcore>zr"; } diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 7bf12ff..f39af6e 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1286,7 +1286,24 @@ ;; ------------------------------------------------------------------------- ;; Predicated LD1 (single). -(define_insn "maskload<mode><vpred>" +(define_expand "maskload<mode><vpred>" + [(set (match_operand:SVE_ALL 0 "register_operand") + (unspec:SVE_ALL + [(match_operand:<VPRED> 2 "nonmemory_operand") + (match_operand:SVE_ALL 1 "memory_operand") + (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")] + UNSPEC_LD1_SVE))] + "TARGET_SVE" + { + if (aarch64_expand_maskloadstore (operands, <MODE>mode)) + DONE; + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (<VPRED>mode, operands[2]); + } +) + +;; Predicated LD1 (single). +(define_insn "*aarch64_maskload<mode><vpred>" [(set (match_operand:SVE_ALL 0 "register_operand" "=w") (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") @@ -2287,7 +2304,24 @@ ;; ------------------------------------------------------------------------- ;; Predicated ST1 (single). -(define_insn "maskstore<mode><vpred>" +(define_expand "maskstore<mode><vpred>" + [(set (match_operand:SVE_ALL 0 "memory_operand") + (unspec:SVE_ALL + [(match_operand:<VPRED> 2 "nonmemory_operand") + (match_operand:SVE_ALL 1 "register_operand") + (match_dup 0)] + UNSPEC_ST1_SVE))] + "TARGET_SVE" + { + if (aarch64_expand_maskloadstore (operands, <MODE>mode)) + DONE; + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (<VPRED>mode, operands[2]); + } +) + +;; Predicated ST1 (single). +(define_insn "*aarch64_maskstore<mode><vpred>" [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index fff8d9d..9e3f288 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -3667,6 +3667,14 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder, if (builder.nelts_per_pattern () == 3) return 0; + /* It is conservatively correct to drop the element size to a lower value, + and we must do so if the predicate consists of a leading "foreground" + sequence that is smaller than the element size. Without this, + we would test only one bit and so treat everything as either an + all-true or an all-false predicate. */ + if (builder.nelts_per_pattern () == 2) + elt_size = MIN (elt_size, builder.npatterns ()); + /* Skip over leading set bits. */ unsigned int nelts = builder.encoded_nelts (); unsigned int i = 0; @@ -3698,6 +3706,24 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder, return vl; } +/* Return: + + * -1 if all bits of PRED are set + * N if PRED has N leading set bits followed by all clear bits + * 0 if PRED does not have any of these forms. */ + +int +aarch64_partial_ptrue_length (rtx pred) +{ + rtx_vector_builder builder; + if (!aarch64_get_sve_pred_bits (builder, pred)) + return 0; + + auto elt_size = vector_element_size (GET_MODE_BITSIZE (GET_MODE (pred)), + GET_MODE_NUNITS (GET_MODE (pred))); + return aarch64_partial_ptrue_length (builder, elt_size); +} + /* See if there is an svpattern that encodes an SVE predicate of mode PRED_MODE in which the first VL bits are set and the rest are clear. Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS. @@ -6410,8 +6436,32 @@ aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl, return gen_rtx_MEM (mode, force_reg (Pmode, addr)); } -/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate - that is known to contain PTRUE. */ +/* Emit a load/store from a subreg of SRC to a subreg of DEST. + The subregs have mode NEW_MODE. Use only for reg<->mem moves. */ +void +aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode) +{ + gcc_assert ((MEM_P (dest) && register_operand (src, VOIDmode)) + || (MEM_P (src) && register_operand (dest, VOIDmode))); + auto mode = GET_MODE (dest); + auto int_mode = aarch64_sve_int_mode (mode); + if (MEM_P (src)) + { + rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0)); + tmp = force_lowpart_subreg (int_mode, tmp, new_mode); + emit_move_insn (dest, force_lowpart_subreg (mode, tmp, int_mode)); + } + else + { + src = force_lowpart_subreg (int_mode, src, mode); + emit_move_insn (adjust_address (dest, new_mode, 0), + force_lowpart_subreg (new_mode, src, int_mode)); + } +} + +/* PRED is a predicate that is known to contain PTRUE. + For 128-bit VLS loads/stores, emit LDR/STR. + Else, emit an SVE predicated move from SRC to DEST. */ void aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src) @@ -6421,16 +6471,7 @@ aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src) && known_eq (GET_MODE_SIZE (mode), 16) && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA && !BYTES_BIG_ENDIAN) - { - if (MEM_P (src)) - { - rtx tmp = force_reg (V16QImode, adjust_address (src, V16QImode, 0)); - emit_move_insn (dest, lowpart_subreg (mode, tmp, V16QImode)); - } - else - emit_move_insn (adjust_address (dest, V16QImode, 0), - force_lowpart_subreg (V16QImode, src, mode)); - } + aarch64_emit_load_store_through_mode (dest, src, V16QImode); else { expand_operand ops[3]; @@ -23526,6 +23567,39 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info, return false; } +/* Try to optimize the expansion of a maskload or maskstore with + the operands in OPERANDS, given that the vector being loaded or + stored has mode MODE. Return true on success or false if the normal + expansion should be used. */ + +bool +aarch64_expand_maskloadstore (rtx *operands, machine_mode mode) +{ + /* If the predicate in operands[2] is a patterned SVE PTRUE predicate + with patterns VL1, VL2, VL4, VL8, or VL16 and at most the bottom + 128 bits are loaded/stored, emit an ASIMD load/store. */ + int vl = aarch64_partial_ptrue_length (operands[2]); + int width = vl * GET_MODE_UNIT_BITSIZE (mode); + if (width <= 128 + && pow2p_hwi (vl) + && (vl == 1 + || (!BYTES_BIG_ENDIAN + && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA))) + { + machine_mode new_mode; + if (known_eq (width, 128)) + new_mode = V16QImode; + else if (known_eq (width, 64)) + new_mode = V8QImode; + else + new_mode = int_mode_for_size (width, 0).require (); + aarch64_emit_load_store_through_mode (operands[0], operands[1], + new_mode); + return true; + } + return false; +} + /* Return true if OP is a valid SIMD move immediate for SVE or AdvSIMD. */ bool aarch64_simd_valid_mov_imm (rtx op) @@ -23840,6 +23914,16 @@ aarch64_strided_registers_p (rtx *operands, unsigned int num_operands, return true; } +/* Return the base 2 logarithm of the bit inverse of OP masked by the lowest + NELTS bits, if OP is a power of 2. Otherwise, returns -1. */ + +int +aarch64_exact_log2_inverse (unsigned int nelts, rtx op) +{ + return exact_log2 ((~INTVAL (op)) + & ((HOST_WIDE_INT_1U << nelts) - 1)); +} + /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). */ void diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 670f487..6bdb68a 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -16211,14 +16211,16 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) case UNGT: case UNGE: case UNEQ: - case LTGT: return CCFPmode; case LT: case LE: case GT: case GE: - return CCFPEmode; + case LTGT: + return (flag_finite_math_only + ? CCFPmode + : CCFPEmode); default: gcc_unreachable (); diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 8472b75..08d3f0d 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -2257,7 +2257,11 @@ extern int making_const_table; #define SELECT_CC_MODE(OP, X, Y) arm_select_cc_mode (OP, X, Y) -#define REVERSIBLE_CC_MODE(MODE) 1 +/* Floating-point modes cannot be reversed unless we don't care about + NaNs. */ +#define REVERSIBLE_CC_MODE(MODE) \ + (flag_finite_math_only \ + || !((MODE) == CCFPmode || (MODE) == CCFPEmode)) #define REVERSE_CONDITION(CODE,MODE) \ (((MODE) == CCFPmode || (MODE) == CCFPEmode) \ diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 31f3ee2..1ba5ac4 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3309,8 +3309,16 @@ ix86_get_vector_load_mode (unsigned int size) mode = V64QImode; else if (size == 32) mode = V32QImode; - else + else if (size == 16) mode = V16QImode; + else if (size == 8) + mode = V8QImode; + else if (size == 4) + mode = V4QImode; + else if (size == 2) + mode = V2QImode; + else + gcc_unreachable (); return mode; } @@ -3338,13 +3346,36 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const, if (SUBREG_P (dest) || mode == vector_mode) replace = vector_const; else - replace = gen_rtx_SUBREG (mode, vector_const, 0); + { + unsigned int size = GET_MODE_SIZE (mode); + if (size < ix86_regmode_natural_size (mode)) + { + /* If the mode size is smaller than its natural size, + first insert an extra move with a QI vector SUBREG + of the same size to avoid validate_subreg failure. */ + machine_mode vmode = ix86_get_vector_load_mode (size); + rtx vreg; + if (mode == vmode) + vreg = vector_const; + else + { + vreg = gen_reg_rtx (vmode); + rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0); + rtx pat = gen_rtx_SET (vreg, vsubreg); + rtx_insn *vinsn = emit_insn_before (pat, insn); + df_insn_rescan (vinsn); + } + replace = gen_rtx_SUBREG (mode, vreg, 0); + } + else + replace = gen_rtx_SUBREG (mode, vector_const, 0); + } - /* NB: Don't run recog_memoized here since vector SUBREG may not - be valid. Let LRA handle vector SUBREG. */ SET_SRC (set) = replace; /* Drop possible dead definitions. */ PATTERN (insn) = set; + INSN_CODE (insn) = -1; + recog_memoized (insn); df_insn_rescan (insn); } } diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index bef95ea..fd36ea8 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -25767,6 +25767,26 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (ix86_tune_cost, GET_MODE_BITSIZE (mode)); break; + case FLOAT_EXPR: + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ix86_cost->cvtsi2ss; + else if (X87_FLOAT_MODE_P (mode)) + /* TODO: We do not have cost tables for x87. */ + stmt_cost = ix86_cost->fadd; + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps); + break; + + case FIX_TRUNC_EXPR: + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ix86_cost->cvtss2si; + else if (X87_FLOAT_MODE_P (mode)) + /* TODO: We do not have cost tables for x87. */ + stmt_cost = ix86_cost->fadd; + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi); + break; + case COND_EXPR: { /* SSE2 conditinal move sequence is: @@ -25930,8 +25950,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, break; } - if (kind == vec_promote_demote - && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) + if (kind == vec_promote_demote) { int outer_size = tree_to_uhwi @@ -25941,16 +25960,25 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))); - int stmt_cost = vec_fp_conversion_cost - (ix86_tune_cost, GET_MODE_BITSIZE (mode)); - /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end - up doing two conversions and packing them. */ + bool inner_fp = FLOAT_TYPE_P + (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))); + + if (fp && inner_fp) + stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + else if (fp && !inner_fp) + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps); + else if (!fp && inner_fp) + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi); + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is + greater than inner size we will end up doing two conversions and + packing them. We always pack pairs; if the size difference is greater + it is split into multiple demote operations. */ if (inner_size > outer_size) - { - int n = inner_size / outer_size; - stmt_cost = stmt_cost * n - + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op); - } + stmt_cost = stmt_cost * 2 + + ix86_vec_cost (mode, ix86_cost->sse_op); } /* If we do elementwise loads into a vector then we are bound by diff --git a/gcc/config/riscv/riscv-vect-permconst.cc b/gcc/config/riscv/riscv-vect-permconst.cc index feecc7e..8e13cf8 100644 --- a/gcc/config/riscv/riscv-vect-permconst.cc +++ b/gcc/config/riscv/riscv-vect-permconst.cc @@ -203,6 +203,24 @@ vector_permconst::process_bb (basic_block bb) if (bias < 0 || bias > 16384 / 8) continue; + /* We need to verify that each element would be a valid value + in the inner mode after applying the bias. */ + machine_mode inner = GET_MODE_INNER (GET_MODE (cvec)); + HOST_WIDE_INT precision = GET_MODE_PRECISION (inner).to_constant (); + int i; + for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) + { + HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias; + if (val != sext_hwi (val, precision)) + break; + } + + /* If the loop terminated early, then we found a case where the + adjusted constant would not fit, so we can't record the constant + for this case (it's unlikely to be useful anyway. */ + if (i != CONST_VECTOR_NUNITS (cvec).to_constant ()) + continue; + /* At this point we have a load of a constant integer vector from the constant pool. That constant integer vector is hopefully a permutation constant. We need to make a copy of the vector and @@ -211,7 +229,7 @@ vector_permconst::process_bb (basic_block bb) XXX This violates structure sharing conventions. */ rtvec_def *nvec = gen_rtvec (CONST_VECTOR_NUNITS (cvec).to_constant ()); - for (int i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) + for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias); rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec); diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 9e471be..80593ee 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -274,6 +274,8 @@ Mask(ZA64RS) Var(riscv_za_subext) Mask(ZA128RS) Var(riscv_za_subext) +Mask(ZAMA16B) Var(riscv_za_subext) + TargetVariable int riscv_zb_subext diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index e8c7f83..d760a7e 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -114,6 +114,7 @@ extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx); extern void s390_expand_vec_strlen (rtx, rtx, rtx); extern void s390_expand_vec_movstr (rtx, rtx, rtx); extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx); +extern void s390_expand_cstoreti4 (rtx, rtx, rtx, rtx); extern bool s390_expand_insv (rtx, rtx, rtx, rtx); extern void s390_expand_cs (machine_mode, rtx, rtx, rtx, rtx, rtx, bool); extern void s390_expand_atomic_exchange_tdsi (rtx, rtx, rtx); diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index e3edf85..2d44cec 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -7210,6 +7210,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count) NULL_RTX, 1, OPTAB_DIRECT); } +/* Expand optab cstoreti4. */ + +void +s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2) +{ + rtx_code code = GET_CODE (cmp); + + if (TARGET_VXE3) + { + rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2); + emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx)); + return; + } + + /* Prior VXE3 emulate the comparison. For an (in)equality test exploit + VECTOR COMPARE EQUAL. For a relational test, first compare the high part + via VECTOR ELEMENT COMPARE (LOGICAL). If the high part does not equal, + then consume the CC immediatelly by a subsequent LOAD ON CONDITION. + Otherweise, if the high part equals, then perform a subsequent VECTOR + COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION. */ + + op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0)); + op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0)); + + if (code == EQ || code == NE) + { + s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ); + return; + } + + /* Normalize code into either GE(U) or GT(U). */ + if (code == LT || code == LE || code == LTU || code == LEU) + { + std::swap (op1, op2); + code = swap_condition (code); + } + + /* For (un)signed comparisons + - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1 + if the relation does _not_ hold. + - high(op1) > high(op2) instruction VECG op2, op1 sets CC1 + if the relation holds. */ + if (code == GT || code == GTU) + std::swap (op1, op2); + machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode; + rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); + emit_insn ( + gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM), + gen_rtx_COMPARE (cc_mode, + gen_rtx_VEC_SELECT (DImode, op1, lane0), + gen_rtx_VEC_SELECT (DImode, op2, lane0)))); + rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM); + rtx lab = gen_label_rtx (); + s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx)); + /* At this point we have that high(op1) == high(op2). Thus, test the low + part, now. For unsigned comparisons + - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1 + if the relation does _not_ hold. + - low(op1) > low(op2) instruction VCHLGS op1, op2 sets CC1 + if the relation holds. */ + std::swap (op1, op2); + emit_insn (gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM), + gen_rtx_COMPARE (CCVIHUmode, op1, op2)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode))))); + emit_label (lab); + /* For (un)signed comparison >= any CC except CC1 means that the relation + holds. For (un)signed comparison > only CC1 means that the relation + holds. */ + rtx_code cmp_code = (code == GE || code == GEU) ? UNGE : LT; + rtx cond = gen_rtx_fmt_ee (cmp_code, CCSmode, ccs_reg, const0_rtx); + emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx)); +} + /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store the result in TARGET. */ @@ -7310,9 +7386,9 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond, /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into TARGET if either all (ALL_P is true) or any (ALL_P is false) of the elements in CMP1 and CMP2 fulfill the comparison. - This function is only used to emit patterns for the vx builtins and - therefore only handles comparison codes required by the - builtins. */ + This function is only used in s390_expand_cstoreti4 and to emit patterns for + the vx builtins and therefore only handles comparison codes required by + those. */ void s390_expand_vec_compare_cc (rtx target, enum rtx_code code, rtx cmp1, rtx cmp2, bool all_p) diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 05b9da6..97a4bdf 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -993,6 +993,10 @@ (define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")]) (define_mode_attr insn_cmp [(CCVEQ "eq") (CCVIH "h") (CCVIHU "hl") (CCVFH "h") (CCVFHE "he")]) +(define_mode_iterator CC_SUZ [CCS CCU CCZ]) +(define_mode_attr l [(CCS "") (CCU "l") (CCZ "")]) +(define_mode_attr cc_tolower [(CCS "ccs") (CCU "ccu") (CCZ "ccz")]) + ; Analogue to TOINTVEC / tointvec (define_mode_attr TOINT [(TF "TI") (DF "DI") (SF "SI")]) (define_mode_attr toint [(TF "ti") (DF "di") (SF "si")]) diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index e29255f..160e42a 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -538,6 +538,14 @@ "vlvg<bhfgq>\t%v0,%1,%Y4(%2)" [(set_attr "op_type" "VRS")]) +(define_expand "cstoreti4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(match_operand:TI 2 "register_operand") + (match_operand:TI 3 "register_operand")]))] + "TARGET_VX" + "s390_expand_cstoreti4 (operands[0], operands[1], operands[2], operands[3]); DONE;") + ;; FIXME: Support also vector mode operands for 0 ;; This is used via RTL standard name as well as for expanding the builtin @@ -2209,6 +2217,28 @@ operands[5] = gen_reg_rtx (V2DImode); }) +(define_insn "*vec_cmpv2di_lane0_<cc_tolower>" + [(set (reg:CC_SUZ CC_REGNUM) + (compare:CC_SUZ + (vec_select:DI + (match_operand:V2DI 0 "register_operand" "v") + (parallel [(const_int 0)])) + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "v") + (parallel [(const_int 0)]))))] + "TARGET_VX" + "vec<l>g\t%v0,%v1" + [(set_attr "op_type" "VRR")]) + +(define_insn "*vec_cmpti_<cc_tolower>" + [(set (reg:CC_SUZ CC_REGNUM) + (compare:CC_SUZ + (match_operand:TI 0 "register_operand" "v") + (match_operand:TI 1 "register_operand" "v")))] + "TARGET_VXE3" + "vec<l>q\t%v0,%v1" + [(set_attr "op_type" "VRR")]) + ;; ;; Floating point compares diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index e6d9fa6..d92b9d6 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,11 @@ +2025-05-07 Paul Thomas <pault@gcc.gnu.org> + and Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/119948 + * primary.cc (match_variable): Module procedures with sym the + same as result can be treated as variables, although marked + external. + 2025-05-06 Jerry DeLisle <jvdelisle@gcc.gnu.org> PR fortran/120049 diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 5884b79..7721795 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -10573,7 +10573,7 @@ gimple_fold_indirect_ref (tree t) integer types involves undefined behavior on overflow and the operation can be expressed with unsigned arithmetic. */ -bool +static bool arith_code_with_undefined_signed_overflow (tree_code code) { switch (code) @@ -10590,6 +10590,30 @@ arith_code_with_undefined_signed_overflow (tree_code code) } } +/* Return true if STMT has an operation that operates on a signed + integer types involves undefined behavior on overflow and the + operation can be expressed with unsigned arithmetic. */ + +bool +gimple_with_undefined_signed_overflow (gimple *stmt) +{ + if (!is_gimple_assign (stmt)) + return false; + tree lhs = gimple_assign_lhs (stmt); + if (!lhs) + return false; + tree lhs_type = TREE_TYPE (lhs); + if (!INTEGRAL_TYPE_P (lhs_type) + && !POINTER_TYPE_P (lhs_type)) + return false; + if (!TYPE_OVERFLOW_UNDEFINED (lhs_type)) + return false; + if (!arith_code_with_undefined_signed_overflow + (gimple_assign_rhs_code (stmt))) + return false; + return true; +} + /* Rewrite STMT, an assignment with a signed integer or pointer arithmetic operation that can be transformed to unsigned arithmetic by converting its operand, carrying out the operation in the corresponding unsigned diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h index 2790d0f..5fcfdcd 100644 --- a/gcc/gimple-fold.h +++ b/gcc/gimple-fold.h @@ -59,7 +59,7 @@ extern tree gimple_get_virt_method_for_vtable (HOST_WIDE_INT, tree, extern tree gimple_fold_indirect_ref (tree); extern bool gimple_fold_builtin_sprintf (gimple_stmt_iterator *); extern bool gimple_fold_builtin_snprintf (gimple_stmt_iterator *); -extern bool arith_code_with_undefined_signed_overflow (tree_code); +extern bool gimple_with_undefined_signed_overflow (gimple *); extern void rewrite_to_defined_overflow (gimple_stmt_iterator *); extern gimple_seq rewrite_to_defined_overflow (gimple *); extern void replace_call_with_value (gimple_stmt_iterator *, tree); diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index 7bcbe11..b34fd2f 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -7387,6 +7387,13 @@ simplify_context::simplify_ternary_operation (rtx_code code, machine_mode mode, return gen_rtx_CONST_VECTOR (mode, v); } + if (swap_commutative_operands_p (op0, op1) + /* Two operands have same precedence, then first bit of mask + select first operand. */ + || (!swap_commutative_operands_p (op1, op0) && !(sel & 1))) + return simplify_gen_ternary (code, mode, mode, op1, op0, + GEN_INT (~sel & mask)); + /* Replace (vec_merge (vec_merge a b m) c n) with (vec_merge b c n) if no element from a appears in the result. */ if (GET_CODE (op0) == VEC_MERGE) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b2ebcef..73e8f7c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,74 @@ +2025-05-07 Jeff Law <jlaw@ventanamicro.com> + + PR target/120137 + PR target/120154 + * gcc.target/riscv/pr120137.c: New test. + * gcc.target/riscv/pr120154.c: New test. + +2025-05-07 Dongyan Chen <chendongyan@isrc.iscas.ac.cn> + + * gcc.target/riscv/arch-48.c: New test. + +2025-05-07 Richard Earnshaw <rearnsha@arm.com> + + PR target/110796 + PR target/118446 + * gcc.target/arm/armv8_2-fp16-arith-1.c: Adjust due to no-longer + emitting VCMPE when -ffast-math.. + +2025-05-07 Jakub Jelinek <jakub@redhat.com> + + PR preprocessor/108900 + PR preprocessor/116047 + PR preprocessor/120061 + * gcc.dg/plugin/plugin.exp: Add location-overflow-test-pr116047.c + and location-overflow-test-pr120061.c. + * gcc.dg/plugin/location_overflow_plugin.cc (plugin_init): Don't error + on unknown values, instead just break. Handle 0x4fHHHHHH arguments + differently. + * gcc.dg/plugin/location-overflow-test-pr116047.c: New test. + * gcc.dg/plugin/location-overflow-test-pr116047-1.h: New test. + * gcc.dg/plugin/location-overflow-test-pr116047-2.h: New test. + * gcc.dg/plugin/location-overflow-test-pr120061.c: New test. + * gcc.dg/plugin/location-overflow-test-pr120061-1.h: New test. + * gcc.dg/plugin/location-overflow-test-pr120061-2.h: New test. + +2025-05-07 Jan Hubicka <hubicka@ucw.cz> + + * gcc.target/i386/pr119919.c: Add -mtune=znver1 + +2025-05-07 Jennifer Schmitz <jschmitz@nvidia.com> + + PR target/117978 + * gcc.target/aarch64/sve/acle/general/whilelt_5.c: Adjust expected + outcome. + * gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c: New test. + * gcc.target/aarch64/sve/while_7.c: Adjust expected outcome. + * gcc.target/aarch64/sve/while_9.c: Adjust expected outcome. + +2025-05-07 Stefan Schulze Frielinghaus <stefansf@gcc.gnu.org> + + * gcc.target/s390/vector/cstoreti-1.c: New test. + * gcc.target/s390/vector/cstoreti-2.c: New test. + +2025-05-07 H.J. Lu <hjl.tools@gmail.com> + + PR target/120036 + * g++.target/i386/pr120036.C: New test. + * gcc.target/i386/pr117839-3a.c: Likewise. + * gcc.target/i386/pr117839-3b.c: Likewise. + +2025-05-07 Paul Thomas <pault@gcc.gnu.org> + and Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/119948 + * gfortran.dg/pr119948.f90: Update to incorporate failing test, + where module procedure is the result. Test submodule cases. + +2025-05-07 Jeff Law <jlaw@ventanamicro.com> + + * g++.target/riscv/redundant-andi.C: New test. + 2025-05-06 Dongyan Chen <chendongyan@isrc.iscas.ac.cn> * gcc.target/riscv/arch-47.c: New test. diff --git a/gcc/testsuite/g++.target/i386/pr120036.C b/gcc/testsuite/g++.target/i386/pr120036.C new file mode 100644 index 0000000..a2fc24f --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr120036.C @@ -0,0 +1,113 @@ +/* { dg-do compile { target fpic } } */ +/* { dg-options "-O2 -std=c++11 -march=sapphirerapids -fPIC" } */ + +typedef _Float16 Native; +struct float16_t +{ + Native native; + float16_t (); + float16_t (Native arg) : native (arg) {} + operator Native (); + float16_t + operator+ (float16_t rhs) + { + return native + rhs.native; + } + float16_t + operator* (float16_t) + { + return native * native; + } +}; +template <int N> struct Simd +{ + static constexpr int kPrivateLanes = N; +}; +template <int N> struct ClampNAndPow2 +{ + using type = Simd<N>; +}; +template <int kLimit> struct CappedTagChecker +{ + static constexpr int N = sizeof (int) ? kLimit : 0; + using type = typename ClampNAndPow2<N>::type; +}; +template <typename, int kLimit, int> +using CappedTag = typename CappedTagChecker<kLimit>::type; +template <class D> +int +Lanes (D) +{ + return D::kPrivateLanes; +} +template <class D> int Zero (D); +template <class D> using VFromD = decltype (Zero (D ())); +struct Vec512 +{ + __attribute__ ((__vector_size__ (16))) _Float16 raw; +}; +Vec512 Zero (Simd<2>); +template <class D> void ReduceSum (D, VFromD<D>); +struct Dot +{ + template <int, class D, typename T> + static T + Compute (D d, T *pa, int num_elements) + { + T *pb; + int N = Lanes (d), i = 0; + if (__builtin_expect (num_elements < N, 0)) + { + T sum0 = 0, sum1 = 0; + for (; i + 2 <= num_elements; i += 2) + { + float16_t __trans_tmp_6 = pa[i] * pb[i], + __trans_tmp_5 = sum0 + __trans_tmp_6, + __trans_tmp_8 = pa[i + 1] * pb[1], + __trans_tmp_7 = sum1 + __trans_tmp_8; + sum0 = __trans_tmp_5; + sum1 = __trans_tmp_7; + } + float16_t __trans_tmp_9 = sum0 + sum1; + return __trans_tmp_9; + } + decltype (Zero (d)) sum0; + ReduceSum (d, sum0); + __builtin_trap (); + } +}; +template <int kMul, class Test, int kPow2> struct ForeachCappedR +{ + static void + Do (int min_lanes, int max_lanes) + { + CappedTag<int, kMul, kPow2> d; + Test () (int (), d); + ForeachCappedR<kMul / 2, Test, kPow2>::Do (min_lanes, max_lanes); + } +}; +template <class Test, int kPow2> struct ForeachCappedR<0, Test, kPow2> +{ + static void Do (int, int); +}; +struct TestDot +{ + template <class T, class D> + void + operator() (T, D d) + { + int counts[]{ 1, 3 }; + for (int num : counts) + { + float16_t a; + T __trans_tmp_4 = Dot::Compute<0> (d, &a, num); + } + } +}; +int DotTest_TestAllDot_TestTestBody_max_lanes; +void +DotTest_TestAllDot_TestTestBody () +{ + ForeachCappedR<64, TestDot, 0>::Do ( + 1, DotTest_TestAllDot_TestTestBody_max_lanes); +} diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-1.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-1.h new file mode 100644 index 0000000..3dd6434 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-1.h @@ -0,0 +1,6 @@ + + + + +#include "location-overflow-test-pr116047-2.h" +static_assert (__LINE__ == 6, ""); diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-2.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-2.h new file mode 100644 index 0000000..048f715 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-2.h @@ -0,0 +1 @@ +int i; diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047.c b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047.c new file mode 100644 index 0000000..75161fa --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047.c @@ -0,0 +1,5 @@ +/* PR preprocessor/116047 */ +/* { dg-do preprocess } */ +/* { dg-options "-nostdinc -std=c23 -fplugin-arg-location_overflow_plugin-value=0x4ffe0180" } */ +#include "location-overflow-test-pr116047-1.h" +/* { dg-final { scan-file location-overflow-test-pr116047.i "static_assert\[^\n\r]\*6\[^\n\r]\*== 6" } } */ diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-1.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-1.h new file mode 100644 index 0000000..ebf7704 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-1.h @@ -0,0 +1,6 @@ + + + + +#include "location-overflow-test-pr120061-2.h" + diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-2.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-2.h new file mode 100644 index 0000000..048f715 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-2.h @@ -0,0 +1 @@ +int i; diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061.c b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061.c new file mode 100644 index 0000000..e8e8038 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061.c @@ -0,0 +1,6 @@ +/* PR preprocessor/120061 */ +/* { dg-do preprocess } */ +/* { dg-options "-nostdinc -std=c23 -fplugin-arg-location_overflow_plugin-value=0x61000000" } */ +#include "location-overflow-test-pr120061-1.h" +static_assert (__LINE__ == 5, ""); +/* { dg-final { scan-file location-overflow-test-pr120061.i "static_assert\[^\n\r]\*5\[^\n\r]\*== 5" } } */ diff --git a/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc b/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc index f731b14..f770d35 100644 --- a/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc +++ b/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc @@ -85,9 +85,18 @@ plugin_init (struct plugin_name_args *plugin_info, error_at (UNKNOWN_LOCATION, "missing plugin argument"); /* With 64-bit locations, the thresholds are larger, so shift the base - location argument accordingly. */ + location argument accordingly, basically remap the GCC 14 32-bit + location_t argument values to 64-bit location_t counterparts. There + is one exception for values slightly before the 32-bit location_t + LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES (0x50000000). In that case + remap them to the same amount before the 64-bit location_t + LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - + ((location_t) 0x50000000) << 31. */ gcc_assert (sizeof (location_t) == sizeof (uint64_t)); - base_location = 1 + ((base_location - 1) << 31); + if (base_location >= 0x4f000000 && base_location <= 0x4fffffff) + base_location += (((location_t) 0x50000000) << 31) - 0x50000000; + else + base_location = 1 + ((base_location - 1) << 31); register_callback (plugin_info->base_name, PLUGIN_PRAGMAS, @@ -107,7 +116,7 @@ plugin_init (struct plugin_name_args *plugin_info, break; default: - error_at (UNKNOWN_LOCATION, "unrecognized value for plugin argument"); + break; } return 0; diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp index 90c9162..96e76d2 100644 --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp @@ -138,7 +138,9 @@ set plugin_test_list [list \ { location_overflow_plugin.cc \ location-overflow-test-1.c \ location-overflow-test-2.c \ - location-overflow-test-pr83173.c } \ + location-overflow-test-pr83173.c \ + location-overflow-test-pr116047.c \ + location-overflow-test-pr120061.c } \ { must_tail_call_plugin.cc \ must-tail-call-1.c \ must-tail-call-2.c } \ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c new file mode 100644 index 0000000..4d8199c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c @@ -0,0 +1,66 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-additional-options "-funswitch-loops" } */ + +#include "tree-vect.h" + +typedef int type; +typedef type Vec2[2]; + +struct BytesVec { + type d[100]; +}; + +__attribute__((noipa)) struct BytesVec +buildVertexBufferData(const Vec2 *origVertices, bool needsZW, + unsigned paddingSize, unsigned long t) { + const unsigned vertexCount = t; + struct BytesVec data = (struct BytesVec){.d = {0}}; + type *nextVertexPtr = data.d; + + for (unsigned vertexIdx = 0u; vertexIdx < vertexCount; ++vertexIdx) { + + if (vertexIdx > t) + __builtin_trap(); + __builtin_memcpy(nextVertexPtr, &origVertices[vertexIdx], + 2 * sizeof(type)); + nextVertexPtr += 2; + + if (needsZW) { + nextVertexPtr += 2; + } + + nextVertexPtr += paddingSize; + } + + return data; +} +Vec2 origVertices[] = { + {0, 1}, {2, 3}, {4, 5}, {6, 7}, + {8, 9}, {10, 11}, {12, 13}, {14, 15}, + {16, 17}, {18, 19}, {20, 21}, {22, 23}, + {24, 25}, {26, 27}, {27, 28}, {29, 30}, +}; + +int main() +{ + check_vect (); + struct BytesVec vec + = buildVertexBufferData(origVertices, false, 0, + sizeof(origVertices) / sizeof(origVertices[0])); + + int errors = 0; + for (unsigned i = 0; i < 100; i++) { + if (i / 2 < sizeof(origVertices) / sizeof(origVertices[0])) { + int ii = i; + int e = origVertices[ii / 2][ii % 2]; + if (vec.d[i] != e) + errors++; + } else { + if (vec.d[i] != 0) + errors++; + } + } + if (errors) + __builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c new file mode 100644 index 0000000..1ee30a8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-additional-options "-O3 -fwhole-program" } */ + +short a; +extern _Bool b[][23]; +short g = 6; +int v[4]; +int x[3]; +void c(short g, int v[], int x[]) { + for (;;) + for (unsigned y = 0; y < 023; y++) { + b[y][y] = v[y]; + for (_Bool aa = 0; aa < (_Bool)g; aa = x[y]) + a = a > 0; + } +} +int main() { c(g, v, x); } diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c index f06a74a..05e266a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c @@ -11,8 +11,7 @@ extern "C" { /* ** load_vl1: -** ptrue (p[0-7])\.[bhsd], vl1 -** ld1h z0\.h, \1/z, \[x0\] +** ldr h0, \[x0\] ** ret */ svint16_t @@ -22,7 +21,12 @@ load_vl1 (int16_t *ptr) } /* -** load_vl2: +** load_vl2: { target aarch64_little_endian } +** ldr s0, \[x0\] +** ret +*/ +/* +** load_vl2: { target aarch64_big_endian } ** ptrue (p[0-7])\.h, vl2 ** ld1h z0\.h, \1/z, \[x0\] ** ret @@ -46,7 +50,12 @@ load_vl3 (int16_t *ptr) } /* -** load_vl4: +** load_vl4: { target aarch64_little_endian } +** ldr d0, \[x0\] +** ret +*/ +/* +** load_vl4: { target aarch64_big_endian } ** ptrue (p[0-7])\.h, vl4 ** ld1h z0\.h, \1/z, \[x0\] ** ret @@ -94,7 +103,12 @@ load_vl7 (int16_t *ptr) } /* -** load_vl8: +** load_vl8: { target aarch64_little_endian } +** ldr q0, \[x0\] +** ret +*/ +/* +** load_vl8: { target aarch64_big_endian } ** ptrue (p[0-7])\.h, vl8 ** ld1h z0\.h, \1/z, \[x0\] ** ret diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c b/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c new file mode 100644 index 0000000..2d47c1f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c @@ -0,0 +1,81 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target aarch64_little_endian } */ + +#include <arm_sve.h> + +#define TEST(TYPE, TY, W, B) \ + sv##TYPE \ + ld1_##TY##W##B##_1 (TYPE *x) \ + { \ + svbool_t pg = svwhilelt_b##B (0, W); \ + return svld1_##TY##B (pg, x); \ + } \ + sv##TYPE \ + ld1_##TY##W##B##_2 (TYPE *x) \ + { \ + svbool_t pg = svptrue_pat_b##B ((enum svpattern) (W > 8 ? 9 : W)); \ + return svld1_##TY##B (pg, x); \ + } \ + void \ + st1_##TY##W##B##_1 (TYPE *x, sv##TYPE data) \ + { \ + svbool_t pg = svwhilelt_b##B (0, W); \ + return svst1_##TY##B (pg, x, data); \ + } \ + void \ + st1_##TY##W##B##_2 (TYPE *x, sv##TYPE data) \ + { \ + svbool_t pg = svptrue_pat_b##B ((enum svpattern) (W > 8 ? 9 : W)); \ + return svst1_##TY##B (pg, x, data); \ + } \ + +#define TEST64(TYPE, TY, B) \ + TEST (TYPE, TY, 1, B) \ + TEST (TYPE, TY, 2, B) \ + +#define TEST32(TYPE, TY, B) \ + TEST64 (TYPE, TY, B) \ + TEST (TYPE, TY, 4, B) \ + +#define TEST16(TYPE, TY, B) \ + TEST32 (TYPE, TY, B) \ + TEST (TYPE, TY, 8, B) \ + +#define TEST8(TYPE, TY, B) \ + TEST16 (TYPE, TY, B) \ + TEST (TYPE, TY, 16, B) + +#define T(TYPE, TY, B) \ + TEST##B (TYPE, TY, B) + +T (bfloat16_t, bf, 16) +T (float16_t, f, 16) +T (float32_t, f, 32) +T (float64_t, f, 64) +T (int8_t, s, 8) +T (int16_t, s, 16) +T (int32_t, s, 32) +T (int64_t, s, 64) +T (uint8_t, u, 8) +T (uint16_t, u, 16) +T (uint32_t, u, 32) +T (uint64_t, u, 64) + +/* { dg-final { scan-assembler-times {\tldr\tq0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tldr\td0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tldr\ts0, \[x0\]} 18 } } */ +/* { dg-final { scan-assembler-times {\tldr\th0, \[x0\]} 12 } } */ +/* { dg-final { scan-assembler-times {\tldr\tb0, \[x0\]} 4 } } */ + +/* { dg-final { scan-assembler-times {\tstr\tq0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tstr\td0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts0, \[x0\]} 18 } } */ +/* { dg-final { scan-assembler-times {\tstr\th0, \[x0\]} 12 } } */ +/* { dg-final { scan-assembler-times {\tstr\tb0, \[x0\]} 4 } } */ + +svint8_t foo (int8_t *x) +{ + return svld1_s8 (svptrue_b16 (), x); +} +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, all\n\tld1b} 1 } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c index a66a20d..ab2fa36 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c @@ -19,7 +19,7 @@ TEST_ALL (ADD_LOOP) -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl8\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\td[0-9]+, \[x0\]} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\tq[0-9]+, \[x0\]} 1 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c index dd3f404..99940dd 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c @@ -19,7 +19,7 @@ TEST_ALL (ADD_LOOP) -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\tq[0-9]+\, \[x0\]} 1 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 2 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c index 52b8737..f3fea52 100644 --- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c @@ -106,8 +106,7 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t) /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ /* For float16_t. */ -/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 2 } } */ -/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 6 } } */ /* For float16x4_t. */ /* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+} 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr117839-3a.c b/gcc/testsuite/gcc.target/i386/pr117839-3a.c new file mode 100644 index 0000000..81afa9d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117839-3a.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */ +/* { dg-final { scan-assembler-times "xor\[a-z\]*\[\t \]*%xmm\[0-9\]\+,\[^,\]*" 1 } } */ + +typedef char v4qi __attribute__((vector_size(4))); +typedef char v16qi __attribute__((vector_size(16))); + +v4qi a; +v16qi b; +void +foo (v4qi* c, v16qi* d) +{ + v4qi sum = __extension__(v4qi){0, 0, 0, 0}; + v16qi sum2 = __extension__(v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0}; + for (int i = 0; i != 100; i++) + sum += c[i]; + for (int i = 0 ; i != 100; i++) + sum2 += d[i]; + a = sum; + b = sum2; +} diff --git a/gcc/testsuite/gcc.target/i386/pr117839-3b.c b/gcc/testsuite/gcc.target/i386/pr117839-3b.c new file mode 100644 index 0000000..a599c28 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117839-3b.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64-v3" } */ +/* { dg-final { scan-assembler-times "xor\[a-z\]*\[\t \]*%xmm\[0-9\]\+,\[^,\]*" 1 } } */ + +#include "pr117839-3a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr119919.c b/gcc/testsuite/gcc.target/i386/pr119919.c index ed64656..e39819f 100644 --- a/gcc/testsuite/gcc.target/i386/pr119919.c +++ b/gcc/testsuite/gcc.target/i386/pr119919.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -msse2 -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -msse2 -fdump-tree-vect-details -mtune=znver1" } */ int a[9*9]; bool b[9]; void test() diff --git a/gcc/testsuite/gcc.target/riscv/arch-48.c b/gcc/testsuite/gcc.target/riscv/arch-48.c new file mode 100644 index 0000000..58a558e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-48.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zama16b -mabi=lp64" } */ +int foo() +{ +} diff --git a/gcc/testsuite/gcc.target/riscv/pr120137.c b/gcc/testsuite/gcc.target/riscv/pr120137.c new file mode 100644 index 0000000..c55a1c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr120137.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl256b -mrvv-vector-bits=zvl -mabi=lp64" } */ + +char b[13][13]; +void c() { + for (int d = 0; d < 13; ++d) + for (int e = 0; e < 13; ++e) + b[d][e] = e == 0 ? -98 : 38; +} + + + diff --git a/gcc/testsuite/gcc.target/riscv/pr120154.c b/gcc/testsuite/gcc.target/riscv/pr120154.c new file mode 100644 index 0000000..fd849ca --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr120154.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gv -mabi=lp64" } */ + + + +typedef __attribute__((__vector_size__(4))) char V; + +V g; + +V +bar(V a, V b) +{ + V s = a + b + g; + return s; +} + +V +foo() +{ + return bar((V){20}, (V){23, 150}); +} + diff --git a/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c b/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c new file mode 100644 index 0000000..f2a131b --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c @@ -0,0 +1,127 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -march=z13" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** test_le: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_le (__int128 x, __int128 y) { return x <= y; } + +/* +** test_leu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_leu (unsigned __int128 x, unsigned __int128 y) { return x <= y; } + +/* +** test_lt: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_lt (__int128 x, __int128 y) { return x < y; } + +/* +** test_ltu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_ltu (unsigned __int128 x, unsigned __int128 y) { return x < y; } + +/* +** test_ge: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_ge (__int128 x, __int128 y) { return x >= y; } + +/* +** test_geu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_geu (unsigned __int128 x, unsigned __int128 y) { return x >= y; } + +/* +** test_gt: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_gt (__int128 x, __int128 y) { return x > y; } + +/* +** test_gtu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_gtu (unsigned __int128 x, unsigned __int128 y) { return x > y; } + +/* { dg-final { scan-assembler-times {vceqgs\t} 4 } } */ +/* { dg-final { scan-assembler-times {locghie\t} 2 } } */ +/* { dg-final { scan-assembler-times {locghine\t} 2 } } */ + +int test_eq (__int128 x, __int128 y) { return x == y; } + +int test_equ (unsigned __int128 x, unsigned __int128 y) { return x == y; } + +int test_ne (__int128 x, __int128 y) { return x != y; } + +int test_neu (unsigned __int128 x, unsigned __int128 y) { return x != y; } diff --git a/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c b/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c new file mode 100644 index 0000000..d7b0382 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -march=z17" } */ +/* { dg-final { scan-assembler-times {vecq\t} 8 } } */ +/* { dg-final { scan-assembler-times {veclq\t} 4 } } */ +/* { dg-final { scan-assembler-times {locghile\t} 1 } } LE */ +/* { dg-final { scan-assembler-times {slbgr\t} 1 } } LEU */ +/* { dg-final { scan-assembler-times {locghil\t} 2 } } LT LTU */ +/* { dg-final { scan-assembler-times {locghihe\t} 2 } } GE GEU */ +/* { dg-final { scan-assembler-times {locghih\t} 1 } } GT */ +/* { dg-final { scan-assembler-times {alcgr\t} 1 } } GTU */ +/* { dg-final { scan-assembler-times {locghie\t} 2 } } EQ EQU */ +/* { dg-final { scan-assembler-times {locghine\t} 2 } } NE NEU */ + +int test_le (__int128 x, __int128 y) { return x <= y; } +int test_leu (unsigned __int128 x, unsigned __int128 y) { return x <= y; } +int test_lt (__int128 x, __int128 y) { return x < y; } +int test_ltu (unsigned __int128 x, unsigned __int128 y) { return x < y; } +int test_ge (__int128 x, __int128 y) { return x >= y; } +int test_geu (unsigned __int128 x, unsigned __int128 y) { return x >= y; } +int test_gt (__int128 x, __int128 y) { return x > y; } +int test_gtu (unsigned __int128 x, unsigned __int128 y) { return x > y; } +int test_eq (__int128 x, __int128 y) { return x == y; } +int test_equ (unsigned __int128 x, unsigned __int128 y) { return x == y; } +int test_ne (__int128 x, __int128 y) { return x != y; } +int test_neu (unsigned __int128 x, unsigned __int128 y) { return x != y; } diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index 5b63bf6..fe8aee0 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -1066,11 +1066,7 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt, fprintf (dump_file, "tree could trap...\n"); return false; } - else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - || POINTER_TYPE_P (TREE_TYPE (lhs))) - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt))) + else if (gimple_with_undefined_signed_overflow (stmt)) /* We have to rewrite stmts with undefined overflow. */ need_to_rewrite_undefined = true; @@ -2830,7 +2826,6 @@ predicate_statements (loop_p loop) for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) { gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi)); - tree lhs; if (!stmt) ; else if (is_false_predicate (cond) @@ -2886,12 +2881,7 @@ predicate_statements (loop_p loop) gsi_replace (&gsi, new_stmt, true); } - else if (((lhs = gimple_assign_lhs (stmt)), true) - && (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - || POINTER_TYPE_P (TREE_TYPE (lhs))) - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt))) + else if (gimple_with_undefined_signed_overflow (stmt)) rewrite_to_defined_overflow (&gsi); else if (gimple_vdef (stmt)) { @@ -2946,7 +2936,7 @@ predicate_statements (loop_p loop) gsi_replace (&gsi, new_call, true); } - lhs = gimple_get_lhs (gsi_stmt (gsi)); + tree lhs = gimple_get_lhs (gsi_stmt (gsi)); if (lhs && TREE_CODE (lhs) == SSA_NAME) ssa_names.add (lhs); gsi_next (&gsi); diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index 4ca0875..9d64d3a 100644 --- a/gcc/tree-scalar-evolution.cc +++ b/gcc/tree-scalar-evolution.cc @@ -3932,10 +3932,7 @@ final_value_replacement_loop (class loop *loop) gsi2 = gsi_start (stmts); while (!gsi_end_p (gsi2)) { - gimple *stmt = gsi_stmt (gsi2); - if (is_gimple_assign (stmt) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt))) + if (gimple_with_undefined_signed_overflow (gsi_stmt (gsi2))) rewrite_to_defined_overflow (&gsi2); gsi_next (&gsi2); } diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index f791994..19990d6 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -514,15 +514,9 @@ ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_) static inline void ifcombine_rewrite_to_defined_overflow (gimple_stmt_iterator gsi) { - gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)); - if (!ass) + if (!gimple_with_undefined_signed_overflow (gsi_stmt (gsi))) return; - tree lhs = gimple_assign_lhs (ass); - if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - || POINTER_TYPE_P (TREE_TYPE (lhs))) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (ass))) - rewrite_to_defined_overflow (&gsi); + rewrite_to_defined_overflow (&gsi); } diff --git a/gcc/tree-ssa-loop-im.cc b/gcc/tree-ssa-loop-im.cc index a3ca5af..ae2fd87 100644 --- a/gcc/tree-ssa-loop-im.cc +++ b/gcc/tree-ssa-loop-im.cc @@ -1241,12 +1241,24 @@ compute_invariantness (basic_block bb) lim_data->cost); } - if (lim_data->cost >= LIM_EXPENSIVE - /* When we run before PRE and PRE is active hoist all expressions - since PRE would do so anyway and we can preserve range info - but PRE cannot. */ - || (flag_tree_pre && !in_loop_pipeline)) + if (lim_data->cost >= LIM_EXPENSIVE) set_profitable_level (stmt); + /* When we run before PRE and PRE is active hoist all expressions + to the always executed loop since PRE would do so anyway + and we can preserve range info while PRE cannot. */ + else if (flag_tree_pre && !in_loop_pipeline + && outermost) + { + class loop *mloop = lim_data->max_loop; + if (loop_depth (outermost) > loop_depth (mloop)) + { + mloop = outermost; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " constraining to loop depth %d\n\n\n", + loop_depth (mloop)); + } + set_level (stmt, bb->loop_father, mloop); + } } } @@ -1407,11 +1419,7 @@ move_computations_worker (basic_block bb) when the target loop header is executed and the stmt may invoke undefined integer or pointer overflow rewrite it to unsigned arithmetic. */ - if (is_gimple_assign (stmt) - && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))) - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt))) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt)) + if (gimple_with_undefined_signed_overflow (stmt) && (!ALWAYS_EXECUTED_IN (bb) || !(ALWAYS_EXECUTED_IN (bb) == level || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) diff --git a/gcc/tree-ssa-loop-split.cc b/gcc/tree-ssa-loop-split.cc index 5f78c0b..80f488a 100644 --- a/gcc/tree-ssa-loop-split.cc +++ b/gcc/tree-ssa-loop-split.cc @@ -663,10 +663,7 @@ split_loop (class loop *loop1) gsi = gsi_start (stmts2); while (!gsi_end_p (gsi)) { - gimple *stmt = gsi_stmt (gsi); - if (is_gimple_assign (stmt) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt))) + if (gimple_with_undefined_signed_overflow (gsi_stmt (gsi))) rewrite_to_defined_overflow (&gsi); gsi_next (&gsi); } diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc index 4017eea..13bb85c 100644 --- a/gcc/tree-ssa-reassoc.cc +++ b/gcc/tree-ssa-reassoc.cc @@ -2925,30 +2925,22 @@ update_range_test (struct range_entry *range, struct range_entry *otherrange, !gsi_end_p (gsi); gsi_next (&gsi)) { gimple *stmt = gsi_stmt (gsi); - if (is_gimple_assign (stmt)) - if (tree lhs = gimple_assign_lhs (stmt)) - if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - || POINTER_TYPE_P (TREE_TYPE (lhs))) - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))) - { - enum tree_code code = gimple_assign_rhs_code (stmt); - if (arith_code_with_undefined_signed_overflow (code)) - { - gimple_stmt_iterator gsip = gsi; - gimple_stmt_iterator gsin = gsi; - gsi_prev (&gsip); - gsi_next (&gsin); - rewrite_to_defined_overflow (&gsi); - unsigned uid = gimple_uid (stmt); - if (gsi_end_p (gsip)) - gsip = gsi_after_labels (bb); - else - gsi_next (&gsip); - for (; gsi_stmt (gsip) != gsi_stmt (gsin); - gsi_next (&gsip)) - gimple_set_uid (gsi_stmt (gsip), uid); - } - } + if (gimple_with_undefined_signed_overflow (stmt)) + { + gimple_stmt_iterator gsip = gsi; + gimple_stmt_iterator gsin = gsi; + gsi_prev (&gsip); + gsi_next (&gsin); + rewrite_to_defined_overflow (&gsi); + unsigned uid = gimple_uid (stmt); + if (gsi_end_p (gsip)) + gsip = gsi_after_labels (bb); + else + gsi_next (&gsip); + for (; gsi_stmt (gsip) != gsi_stmt (gsin); + gsi_next (&gsip)) + gimple_set_uid (gsi_stmt (gsip), uid); + } } if (opcode == BIT_IOR_EXPR diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 231a3ca..9fd1ef2 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -734,7 +734,6 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) stmt_vec_info stmt_vinfo = vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (stmt)); - stmt = STMT_VINFO_STMT (stmt_vinfo); auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo); if (!dr_ref) continue; diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 562e222..80e9c01 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -5042,14 +5042,17 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, vec<stmt_vec_info> roots = vNULL; vec<tree> remain = vNULL; gphi *phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info)); - stmts.create (1); tree def = gimple_phi_arg_def_from_edge (phi, latch_e); stmt_vec_info lc_info = loop_vinfo->lookup_def (def); - stmts.quick_push (vect_stmt_to_vectorize (lc_info)); - vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, - stmts, roots, remain, - max_tree_size, &limit, - bst_map, NULL, force_single_lane); + if (lc_info) + { + stmts.create (1); + stmts.quick_push (vect_stmt_to_vectorize (lc_info)); + vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, + stmts, roots, remain, + max_tree_size, &limit, + bst_map, NULL, force_single_lane); + } /* When the latch def is from a different cycle this can only be a induction. Build a simple instance for this. ??? We should be able to start discovery from the PHI @@ -5059,8 +5062,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, tem.quick_push (stmt_info); if (!bst_map->get (tem)) { - gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) - == vect_induction_def); stmts.create (1); stmts.quick_push (stmt_info); vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index ea0b426..a8762ba 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -419,18 +419,21 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, } } - /* Check if it's an induction and multiple exits. In this case there will be - a usage later on after peeling which is needed for the alternate exit. */ + /* Check if it's a not live PHI and multiple exits. In this case + there will be a usage later on after peeling which is needed for the + alternate exit. */ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def) + && is_a <gphi *> (stmt) + && ((! VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)) + && ! *live_p) + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)) { if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "vec_stmt_relevant_p: induction forced for " - "early break.\n"); + dump_printf_loc (MSG_NOTE, vect_location, + "vec_stmt_relevant_p: PHI forced live for " + "early break.\n"); LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info); *live_p = true; - } if (*live_p && *relevant == vect_unused_in_scope @@ -714,6 +717,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) bb = bbs[i]; for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) { + if (virtual_operand_p (gimple_phi_result (gsi_stmt (si)))) + continue; stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G", @@ -8786,6 +8791,15 @@ vectorizable_store (vec_info *vinfo, if (n == const_nunits) { int mis_align = dr_misalignment (first_dr_info, vectype); + /* With VF > 1 we advance the DR by step, if that is constant + and only aligned when performed VF times, DR alignment + analysis can analyze this as aligned since it assumes + contiguous accesses. But that is not how we code generate + here, so adjust for this. */ + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, vectype, mis_align); @@ -8807,6 +8821,10 @@ vectorizable_store (vec_info *vinfo, ltype = build_vector_type (elem_type, n); lvectype = vectype; int mis_align = dr_misalignment (first_dr_info, ltype); + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, ltype, mis_align); @@ -8867,17 +8885,10 @@ vectorizable_store (vec_info *vinfo, } } unsigned align; - /* ??? We'd want to use - if (alignment_support_scheme == dr_aligned) - align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); - since doing that is what we assume we can in the above checks. - But this interferes with groups with gaps where for example - VF == 2 makes the group in the unrolled loop aligned but the - fact that we advance with step between the two subgroups - makes the access to the second unaligned. See PR119586. - We have to anticipate that here or adjust code generation to - avoid the misaligned loads by means of permutations. */ - align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); + if (alignment_support_scheme == dr_aligned) + align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); + else + align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); /* Alignment is at most the access size if we do multiple stores. */ if (nstores > 1) align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align); @@ -10805,6 +10816,15 @@ vectorizable_load (vec_info *vinfo, if (n == const_nunits) { int mis_align = dr_misalignment (first_dr_info, vectype); + /* With VF > 1 we advance the DR by step, if that is constant + and only aligned when performed VF times, DR alignment + analysis can analyze this as aligned since it assumes + contiguous accesses. But that is not how we code generate + here, so adjust for this. */ + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, vectype, mis_align); @@ -10833,6 +10853,10 @@ vectorizable_load (vec_info *vinfo, if (VECTOR_TYPE_P (ptype)) { mis_align = dr_misalignment (first_dr_info, ptype); + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_align = vect_supportable_dr_alignment (vinfo, dr_info, ptype, mis_align); @@ -10852,8 +10876,10 @@ vectorizable_load (vec_info *vinfo, } } unsigned align; - /* ??? The above is still wrong, see vectorizable_store. */ - align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); + if (alignment_support_scheme == dr_aligned) + align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); + else + align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); /* Alignment is at most the access size if we do multiple loads. */ if (nloads > 1) align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align); |