diff options
55 files changed, 1183 insertions, 218 deletions
diff --git a/contrib/gcc-changelog/git_commit.py b/contrib/gcc-changelog/git_commit.py index 80a3276..e0c46be 100755 --- a/contrib/gcc-changelog/git_commit.py +++ b/contrib/gcc-changelog/git_commit.py @@ -97,6 +97,7 @@ bug_components = { 'd', 'debug', 'demangler', + 'diagnostics', 'driver', 'fastjar', 'fortran', diff --git a/gcc/ada/gcc-interface/trans.cc b/gcc/ada/gcc-interface/trans.cc index ef5ec75..fd1d39c 100644 --- a/gcc/ada/gcc-interface/trans.cc +++ b/gcc/ada/gcc-interface/trans.cc @@ -8753,7 +8753,7 @@ gnat_to_gnu (Node_Id gnat_node) /* Set the location information on the result if it's not a simple name or something that contains a simple name, for example a tag, because - we don"t want all the references to get the location of the first use. + we don't want all the references to get the location of the first use. Note that we may have no result if we tried to build a CALL_EXPR node to a procedure with no side-effects and optimization is enabled. */ else if (kind != N_Identifier diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 8040409..6f11cc0 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -224,7 +224,7 @@ AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) /* NVIDIA ('N') cores. */ -AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1) +AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), olympus, 0x4e, 0x10, -1) /* Armv9-A big.LITTLE processors. */ AARCH64_CORE("gb10", gb10, cortexa57, V9_2A, (SVE2_BITPERM, SVE2_AES, SVE2_SHA3, SVE2_SM4, MEMTAG, PROFILE), cortexx925, 0x41, AARCH64_BIG_LITTLE (0xd85, 0xd87), -1) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4d9d83d..cb1699a 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -430,6 +430,7 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] = #include "tuning_models/neoversev2.h" #include "tuning_models/neoversev3.h" #include "tuning_models/neoversev3ae.h" +#include "tuning_models/olympus.h" #include "tuning_models/a64fx.h" #include "tuning_models/fujitsu_monaka.h" @@ -17720,7 +17721,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, { if (gimple_vuse (SSA_NAME_DEF_STMT (offset))) { - if (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type) + if (SLP_TREE_TYPE (node) == load_vec_info_type) ops->loads += count - 1; else /* Stores want to count both the index to array and data to diff --git a/gcc/config/aarch64/tuning_models/olympus.h b/gcc/config/aarch64/tuning_models/olympus.h new file mode 100644 index 0000000..268789d --- /dev/null +++ b/gcc/config/aarch64/tuning_models/olympus.h @@ -0,0 +1,210 @@ +/* Tuning model description for the NVIDIA Olympus core. + Copyright The GNU Toolchain Authors. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCC_AARCH64_H_OLYMPUS +#define GCC_AARCH64_H_OLYMPUS + +#include "generic.h" + +static struct cpu_regmove_cost olympus_regmove_cost = +{ + 1, /* GP2GP */ + /* Spilling to int<->fp instead of memory is recommended so set + realistic costs compared to memmov_cost. */ + 3, /* GP2FP */ + 3, /* FP2GP */ + 2 /* FP2FP */ +}; + +static advsimd_vec_cost olympus_advsimd_vector_cost = +{ + 2, /* int_stmt_cost */ + 2, /* fp_stmt_cost */ + 2, /* ld2_st2_permute_cost */ + 2, /* ld3_st3_permute_cost */ + 3, /* ld4_st4_permute_cost */ + 2, /* permute_cost */ + 5, /* reduc_i8_cost */ + 3, /* reduc_i16_cost */ + 3, /* reduc_i32_cost */ + 2, /* reduc_i64_cost */ + 4, /* reduc_f16_cost */ + 4, /* reduc_f32_cost */ + 4, /* reduc_f64_cost */ + 2, /* store_elt_extra_cost */ + 8, /* vec_to_scalar_cost */ + 4, /* scalar_to_vec_cost */ + 6, /* align_load_cost */ + 6, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ +}; + +static sve_vec_cost olympus_sve_vector_cost = +{ + { + 2, /* int_stmt_cost */ + 2, /* fp_stmt_cost */ + 2, /* ld2_st2_permute_cost */ + 3, /* ld3_st3_permute_cost */ + 3, /* ld4_st4_permute_cost */ + 2, /* permute_cost */ + 9, /* reduc_i8_cost */ + 8, /* reduc_i16_cost */ + 6, /* reduc_i32_cost */ + 2, /* reduc_i64_cost */ + 8, /* reduc_f16_cost */ + 6, /* reduc_f32_cost */ + 4, /* reduc_f64_cost */ + 2, /* store_elt_extra_cost */ + 8, /* vec_to_scalar_cost */ + 4, /* scalar_to_vec_cost */ + 4, /* align_load_cost */ + 6, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ + }, + 3, /* clast_cost */ + 10, /* fadda_f16_cost */ + 6, /* fadda_f32_cost */ + 4, /* fadda_f64_cost */ + 14, /* gather_load_x32_cost */ + 12, /* gather_load_x64_cost */ + 42, /* gather_load_x32_init_cost */ + 24, /* gather_load_x64_init_cost */ + 1 /* scatter_store_elt_cost */ +}; + +static aarch64_scalar_vec_issue_info olympus_scalar_issue_info = +{ + 4, /* loads_stores_per_cycle */ + 2, /* stores_per_cycle */ + 8, /* general_ops_per_cycle */ + 0, /* fp_simd_load_general_ops */ + 1 /* fp_simd_store_general_ops */ +}; + +static aarch64_advsimd_vec_issue_info olympus_advsimd_issue_info = +{ + { + 3, /* loads_stores_per_cycle */ + 2, /* stores_per_cycle */ + 6, /* general_ops_per_cycle */ + 0, /* fp_simd_load_general_ops */ + 1 /* fp_simd_store_general_ops */ + }, + 2, /* ld2_st2_general_ops */ + 2, /* ld3_st3_general_ops */ + 3 /* ld4_st4_general_ops */ +}; + +static aarch64_sve_vec_issue_info olympus_sve_issue_info = +{ + { + { + 3, /* loads_stores_per_cycle */ + 2, /* stores_per_cycle */ + 6, /* general_ops_per_cycle */ + 0, /* fp_simd_load_general_ops */ + 1 /* fp_simd_store_general_ops */ + }, + 2, /* ld2_st2_general_ops */ + 2, /* ld3_st3_general_ops */ + 3 /* ld4_st4_general_ops */ + }, + 2, /* pred_ops_per_cycle */ + 1, /* while_pred_ops */ + 0, /* int_cmp_pred_ops */ + 0, /* fp_cmp_pred_ops */ + 1, /* gather_scatter_pair_general_ops */ + 1 /* gather_scatter_pair_pred_ops */ +}; + +static aarch64_vec_issue_info olympus_vec_issue_info = +{ + &olympus_scalar_issue_info, + &olympus_advsimd_issue_info, + &olympus_sve_issue_info +}; + +/* Olympus costs for vector insn classes. */ +static struct cpu_vector_cost olympus_vector_cost = +{ + 1, /* scalar_int_stmt_cost */ + 2, /* scalar_fp_stmt_cost */ + 4, /* scalar_load_cost */ + 1, /* scalar_store_cost */ + 1, /* cond_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &olympus_advsimd_vector_cost, /* advsimd */ + &olympus_sve_vector_cost, /* sve */ + &olympus_vec_issue_info /* issue_info */ +}; + +/* Olympus prefetch settings (which disable prefetch). */ +static cpu_prefetch_tune olympus_prefetch_tune = +{ + 0, /* num_slots */ + -1, /* l1_cache_size */ + 64, /* l1_cache_line_size */ + -1, /* l2_cache_size */ + true, /* prefetch_dynamic_strides */ + -1, /* minimum_stride */ + -1 /* default_opt_level */ +}; + +static struct tune_params olympus_tunings = +{ + &cortexa76_extra_costs, + &generic_armv9_a_addrcost_table, + &olympus_regmove_cost, + &olympus_vector_cost, + &generic_branch_cost, + &generic_approx_modes, + SVE_128, /* sve_width */ + { 4, /* load_int. */ + 1, /* store_int. */ + 6, /* load_fp. */ + 3, /* store_fp. */ + 5, /* load_pred. */ + 1 /* store_pred. */ + }, /* memmov_cost. */ + 10, /* issue_rate */ + AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */ + "32:16", /* function_align. */ + "4", /* jump_align. */ + "32:16", /* loop_align. */ + 8, /* int_reassoc_width. */ + 6, /* fp_reassoc_width. */ + 4, /* fma_reassoc_width. */ + 6, /* vec_reassoc_width. */ + 2, /* min_div_recip_mul_sf. */ + 2, /* min_div_recip_mul_df. */ + 0, /* max_case_values. */ + tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ + (AARCH64_EXTRA_TUNE_BASE + | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS + | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT + | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */ + &olympus_prefetch_tune, + AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ + AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ +}; + +#endif /* GCC_AARCH64_H_OLYMPUS. */ diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index fe68678..0287400 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -92,6 +92,8 @@ enum hsaco_attr_type /* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0) flag for non-scalar memory operations. The string starts on purpose with a space. Note: for scalar memory operations (i.e. 's_...'), 'glc' is still used. + Note: on atomics, glc/sc0 denotes whether the pre-op operation should + be used. CDNA3 also uses 'nt' instead of 'slc' and 'sc1' instead of 'scc'; however, there is no non-scalar user so far. */ #define TARGET_GLC_NAME (TARGET_CDNA3 ? " sc0" : " glc") diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 0994329..a34d2e3 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -3938,6 +3938,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) @@ -3992,6 +3993,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) @@ -4050,6 +4052,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,yes,yes")]) @@ -4073,6 +4076,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,yes,yes")]) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 8959118..557568c 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -5792,42 +5792,6 @@ gcn_libc_has_function (enum function_class fn_class, /* }}} */ /* {{{ md_reorg pass. */ -/* Identify V_CMPX from the "type" attribute; - note: this will also match 'v_cmp %E1 vcc'. */ - -static bool -gcn_cmpx_insn_p (attr_type type) -{ - switch (type) - { - case TYPE_VOPC: - return true; - case TYPE_MUBUF: - case TYPE_MTBUF: - case TYPE_FLAT: - case TYPE_VOP3P_MAI: - case TYPE_UNKNOWN: - case TYPE_SOP1: - case TYPE_SOP2: - case TYPE_SOPK: - case TYPE_SOPC: - case TYPE_SOPP: - case TYPE_SMEM: - case TYPE_DS: - case TYPE_VOP2: - case TYPE_VOP1: - case TYPE_VOP3A: - case TYPE_VOP3B: - case TYPE_VOP_SDWA: - case TYPE_VOP_DPP: - case TYPE_MULT: - case TYPE_VMULT: - return false; - } - gcc_unreachable (); - return false; -} - /* Identify VMEM instructions from their "type" attribute. */ static bool @@ -6356,19 +6320,59 @@ gcn_md_reorg (void) reg_class_contents[(int)VCC_CONDITIONAL_REG]))) nops_rqd = ivccwait - prev_insn->age; + /* NOTE: The following condition for adding wait state exists, but + GCC does not access the special registers using their SGPR#. + Thus, no action is required here. The following wait-state + condition exists at least for VEGA/gfx900+ to CDNA3: + Mixed use of VCC: alias vs. SGPR# - v_readlane, + v_readfirstlane, v_cmp, v_add_*i/u, v_sub_*i/u, v_div_*scale + followed by VALU reads VCC as constant requires 1 wait state. + (As carry-in, it requires none.) + [VCC can be accessed by name or logical SGPR that holds it.] */ + + /* Testing indicates that CDNA3 requires an s_nop between + e.g. 'v_cmp_eq_u64 vcc, v[4:5], v[8:9]' and 'v_mov_b32 v0, vcc_lo'. + Thus: add it between v_cmp writing VCC and VALU read of VCC. */ + if (TARGET_CDNA3_NOPS + && (prev_insn->age + nops_rqd) < 1 + && iunit == UNIT_VECTOR + && (hard_reg_set_intersect_p + (depregs, reg_class_contents[(int)VCC_CONDITIONAL_REG])) + && get_attr_vcmp (prev_insn->insn) == VCMP_VCMP) + nops_rqd = 1 - prev_insn->age; + + /* CDNA3: VALU writes SGPR/VCC: v_readlane, v_readfirstlane, v_cmp, + v_add_*i/u, v_sub_*i/u, v_div_*scale - followed by: + - VALU reads SGPR as constant requires 1 waite state + - VALU reads SGPR as carry-in requires no waite state + - v_readlane/v_writelane reads SGPR as lane select requires 4 wait + states. */ + if (TARGET_CDNA3_NOPS + && (prev_insn->age + nops_rqd) < 4 + && iunit == UNIT_VECTOR + && prev_insn->unit == UNIT_VECTOR + && hard_reg_set_intersect_p + (depregs, reg_class_contents[(int) SGPR_SRC_REGS])) + { + if (get_attr_laneselect (insn) != LANESELECT_NO) + nops_rqd = 4 - prev_insn->age; + else if ((prev_insn->age + nops_rqd) < 1) + nops_rqd = 1 - prev_insn->age; + } + /* CDNA3: v_cmpx followed by - V_readlane, v_readfirstlane, v_writelane requires 4 wait states - VALU reads EXEC as constant requires 2 wait states - other VALU requires no wait state */ if (TARGET_CDNA3_NOPS && (prev_insn->age + nops_rqd) < 4 - && gcn_cmpx_insn_p (prev_insn->type) + && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX && get_attr_laneselect (insn) != LANESELECT_NO) nops_rqd = 4 - prev_insn->age; else if (TARGET_CDNA3_NOPS && (prev_insn->age + nops_rqd) < 2 && iunit == UNIT_VECTOR - && gcn_cmpx_insn_p (prev_insn->type) + && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX && TEST_HARD_REG_BIT (ireads, EXECZ_REG)) nops_rqd = 2 - prev_insn->age; @@ -6436,8 +6440,8 @@ gcn_md_reorg (void) } /* Insert the required number of NOPs. */ - for (int i = nops_rqd; i > 0; i--) - emit_insn_after (gen_nop (), last_insn); + if (nops_rqd > 0) + emit_insn_after (gen_nops (GEN_INT (nops_rqd-1)), last_insn); /* Age the previous instructions. We can also ignore writes to registers subsequently overwritten. */ @@ -7283,6 +7287,11 @@ print_operand_address (FILE *file, rtx mem) H - print second part of a multi-reg value (high-part of 2-reg value) J - print third part of a multi-reg value K - print fourth part of a multi-reg value + R Print a scalar register number as an integer. Temporary hack. + V - Print a vector register number as an integer. Temporary hack. + + Additionally, the standard builtin c, n, a, and l exist; see gccint's + "Output Templates and Operand Substitution" for details. */ void diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index fad42e6..4130cf6 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -324,6 +324,11 @@ "store,storex34,load,atomic,atomicwait,cmpswapx2,no" (const_string "no")) +; Identify v_cmp and v_cmpx instructions for "Manually Inserted Wait State" +; handling. + +(define_attr "vcmp" "vcmp,vcmpx,no" (const_string "no")) + ; Identify instructions that require "Manually Inserted Wait State" if ; a previous instruction writes to VCC. The number gives the number of NOPs. @@ -424,6 +429,15 @@ "s_nop\t0x0" [(set_attr "type" "sopp")]) +; Variant of 'nop' that accepts a count argument. +; s_nop accepts 0x0 to 0xf for 1 to 16 nops; however, +; as %0 prints decimals, only 0 to 9 (= 1 to 10 nops) can be used. +(define_insn "nops" + [(match_operand 0 "const_int_operand")] + "" + "s_nop\t0x%0" + [(set_attr "type" "sopp")]) + ; FIXME: What should the value of the immediate be? Zero is disallowed, so ; pick 1 for now. (define_insn "trap" @@ -566,6 +580,7 @@ [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat, flat,flat,flat,flat") (set_attr "flatmemaccess" "*,*,*,*,*,*,*,*,*,load,load,store,load,load,store") + (set_attr "vcmp" "*,*,*,*,vcmp,*,*,*,*,*,*,*,*,*,*") (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*") (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12") (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*") @@ -1089,6 +1104,7 @@ s_cmp%D1\t%2, %3 v_cmp%E1\tvcc, %2, %3" [(set_attr "type" "sopc,vopc") + (set_attr "vcmp" "vcmp") (set_attr "length" "8")]) (define_insn "cstoredi4_vector" @@ -1099,6 +1115,7 @@ "" "v_cmp%E1\tvcc, %2, %3" [(set_attr "type" "vopc") + (set_attr "vcmp" "vcmp") (set_attr "length" "8")]) (define_expand "cbranchdi4" @@ -1125,6 +1142,7 @@ "" "v_cmp%E1\tvcc, %2, %3" [(set_attr "type" "vopc") + (set_attr "vcmp" "vcmp") (set_attr "length" "8")]) (define_expand "cbranch<mode>4" @@ -2165,7 +2183,7 @@ ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;flat_load%o0\t%0, %A1%O1 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\t0\;buffer_inv sc1" : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\t0\;buffer_wbinvl1_vol"); @@ -2177,7 +2195,7 @@ ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;global_load%o0\t%0, %A1%O1 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;global_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_inv sc1" : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"); @@ -2224,7 +2242,7 @@ : TARGET_WBINVL1_CACHE ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1" + ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_store%o1\t%A0, %1%O0 %G1" : "error: cache architectire unspecified"); case 2: return (TARGET_GLn_CACHE @@ -2232,7 +2250,7 @@ : TARGET_WBINVL1_CACHE ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1" + ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;global_store%o1\t%A0, %1%O0 %G1" : "error: cache architecture unspecified"); } break; @@ -2252,7 +2270,8 @@ ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1\;" "s_waitcnt\t0\;buffer_wbinvl1_vol" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;" + "flat_store%o1\t%A0, %1%O0 %G1\;" "s_waitcnt\t0\;buffer_inv sc1" : "error: cache architecture unspecified"); case 2: @@ -2263,7 +2282,8 @@ ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;" + "global_store%o1\t%A0, %1%O0 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_inv sc1" : "error: cache architecture unspecified"); } @@ -2347,7 +2367,7 @@ ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" "s_waitcnt\t0" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" "s_waitcnt\t0" : "error: cache architecture unspecified"); case 2: @@ -2360,7 +2380,7 @@ "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;" "s_waitcnt\tvmcnt(0)" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;" "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;" "s_waitcnt\tvmcnt(0)" : "error: cache architecture unspecified"); @@ -2382,7 +2402,7 @@ ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" "s_waitcnt\t0\;buffer_wbinvl1_vol" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" "s_waitcnt\t0\;buffer_inv sc1" : "error: cache architecture unspecified"); case 2: @@ -2395,7 +2415,7 @@ "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;" "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_inv sc1" : "error: cache architecture unspecified"); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4682db85..0f0acae 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -12442,6 +12442,28 @@ static GTY(()) rtx ix86_tls_symbol; static rtx ix86_tls_get_addr (void) { + if (cfun->machine->call_saved_registers + == TYPE_NO_CALLER_SAVED_REGISTERS) + { + /* __tls_get_addr doesn't preserve vector registers. When a + function with no_caller_saved_registers attribute calls + __tls_get_addr, YMM and ZMM registers will be clobbered. + Issue an error and suggest -mtls-dialect=gnu2 in this case. */ + if (cfun->machine->func_type == TYPE_NORMAL) + error (G_("%<-mtls-dialect=gnu2%> must be used with a function" + " with the %<no_caller_saved_registers%> attribute")); + else + error (cfun->machine->func_type == TYPE_EXCEPTION + ? G_("%<-mtls-dialect=gnu2%> must be used with an" + " exception service routine") + : G_("%<-mtls-dialect=gnu2%> must be used with an" + " interrupt service routine")); + /* Don't issue the same error twice. */ + cfun->machine->func_type = TYPE_NORMAL; + cfun->machine->call_saved_registers + = TYPE_DEFAULT_CALL_SAVED_REGISTERS; + } + if (!ix86_tls_symbol) { const char *sym @@ -26128,23 +26150,15 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (AGU and load ports). Try to account for this by scaling the construction cost by the number of elements involved. */ if ((kind == vec_construct || kind == vec_to_scalar) - && ((stmt_info - && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type - || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) - && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE - && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) + && ((node + && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE + || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP + && SLP_TREE_LANES (node) == 1)) + && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF + (SLP_TREE_REPRESENTATIVE (node)))) != INTEGER_CST)) - || (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) - == VMAT_GATHER_SCATTER))) - || (node - && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE - || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP - && SLP_TREE_LANES (node) == 1)) - && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF - (SLP_TREE_REPRESENTATIVE (node)))) - != INTEGER_CST)) - || (SLP_TREE_MEMORY_ACCESS_TYPE (node) - == VMAT_GATHER_SCATTER))))) + || (SLP_TREE_MEMORY_ACCESS_TYPE (node) + == VMAT_GATHER_SCATTER))))) { stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1); diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h index d897763..5fc8665 100644 --- a/gcc/config/loongarch/loongarch.h +++ b/gcc/config/loongarch/loongarch.h @@ -823,8 +823,6 @@ typedef struct { #define CASE_VECTOR_MODE Pmode -#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) Pmode - /* Define this as 1 if `char' should by default be signed; else as 0. */ #ifndef DEFAULT_SIGNED_CHAR #define DEFAULT_SIGNED_CHAR 1 diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt index d326ca4..9796839 100644 --- a/gcc/config/nvptx/nvptx.opt +++ b/gcc/config/nvptx/nvptx.opt @@ -120,6 +120,51 @@ Target RejectNegative Alias(misa=,sm_89) march-map=sm_90a Target RejectNegative Alias(misa=,sm_89) +march-map=sm_100 +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_100f +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_100a +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_101 +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_101f +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_101a +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_103 +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_103f +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_103a +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_120 +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_120f +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_120a +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_121 +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_121f +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_121a +Target RejectNegative Alias(misa=,sm_89) + Enum Name(ptx_version) Type(enum ptx_version) Known PTX ISA versions (for use with the -mptx= option): diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index df924fa..1c6bc25 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -275,13 +275,13 @@ loop_invariant_op_p (class loop *loop, /* Return true if the variable should be counted into liveness. */ static bool variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info, - slp_tree node ATTRIBUTE_UNUSED, tree var, bool lhs_p) + slp_tree node, tree var, bool lhs_p) { if (!var) return false; gimple *stmt = STMT_VINFO_STMT (stmt_info); stmt_info = vect_stmt_to_vectorize (stmt_info); - enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info); + enum stmt_vec_info_type type = SLP_TREE_TYPE (node); if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)) { if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE @@ -602,9 +602,9 @@ get_store_value (gimple *stmt) /* Return true if additional vector vars needed. */ bool costs::need_additional_vector_vars_p (stmt_vec_info stmt_info, - slp_tree node ATTRIBUTE_UNUSED) + slp_tree node) { - enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info); + enum stmt_vec_info_type type = SLP_TREE_TYPE (node); if (type == load_vec_info_type || type == store_vec_info_type) { if (STMT_VINFO_GATHER_SCATTER_P (stmt_info) @@ -694,7 +694,7 @@ costs::update_local_live_ranges ( if (!node) continue; - if (STMT_VINFO_TYPE (stmt_info) == undef_vec_info_type) + if (SLP_TREE_TYPE (*node) == undef_vec_info_type) continue; for (j = 0; j < gimple_phi_num_args (phi); j++) @@ -773,7 +773,7 @@ costs::update_local_live_ranges ( slp_tree *node = vinfo_slp_map.get (stmt_info); if (!node) continue; - enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info); + enum stmt_vec_info_type type = SLP_TREE_TYPE (*node); if (need_additional_vector_vars_p (stmt_info, *node)) { /* For non-adjacent load/store STMT, we will potentially diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 16227e5..1c60695 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -5174,6 +5174,7 @@ public: protected: void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info, + slp_tree node, vect_cost_model_location, unsigned int); void density_test (loop_vec_info); void adjust_vect_cost_per_loop (loop_vec_info); @@ -5321,6 +5322,7 @@ rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind, void rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind, stmt_vec_info stmt_info, + slp_tree node, vect_cost_model_location where, unsigned int orig_count) { @@ -5381,12 +5383,12 @@ rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind, or may not need to apply. When finalizing the cost of the loop, the extra penalty is applied when the load density heuristics are satisfied. */ - if (kind == vec_construct && stmt_info - && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type - && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE - || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP)) + if (kind == vec_construct && node + && SLP_TREE_TYPE (node) == load_vec_info_type + && (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE + || SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP)) { - tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree vectype = SLP_TREE_VECTYPE (node); unsigned int nunits = vect_nunits_for_cost (vectype); /* As PR103702 shows, it's possible that vectorizer wants to do costings for only one unit here, it's no need to do any @@ -5415,7 +5417,7 @@ rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind, unsigned rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind, - stmt_vec_info stmt_info, slp_tree, + stmt_vec_info stmt_info, slp_tree node, tree vectype, int misalign, vect_cost_model_location where) { @@ -5433,7 +5435,7 @@ rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind, retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); m_costs[where] += retval; - update_target_cost_per_stmt (kind, stmt_info, where, orig_count); + update_target_cost_per_stmt (kind, stmt_info, node, where, orig_count); } return retval; diff --git a/gcc/diagnostics/changes.cc b/gcc/diagnostics/changes.cc index 290d602..e1caab0 100644 --- a/gcc/diagnostics/changes.cc +++ b/gcc/diagnostics/changes.cc @@ -1850,8 +1850,13 @@ run_all_tests () } } // namespace diagnostics::changes::selftest + +#endif /* CHECKING_P */ + } // namespace diagnostics::changes +#if CHECKING_P + namespace selftest { // diagnostics::selftest /* Run all of the selftests within this file. */ @@ -1863,6 +1868,7 @@ changes_cc_tests () } } // namespace selftest -} // namespace diagnostics #endif /* CHECKING_P */ + +} // namespace diagnostics diff --git a/gcc/diagnostics/context.cc b/gcc/diagnostics/context.cc index 0dbc148..85f7d2a 100644 --- a/gcc/diagnostics/context.cc +++ b/gcc/diagnostics/context.cc @@ -2130,10 +2130,11 @@ context_cc_tests () } } // namespace diagnostics::selftest -} // namespace diagnostics #endif /* #if CHECKING_P */ +} // namespace diagnostics + #if __GNUC__ >= 10 # pragma GCC diagnostic pop #endif diff --git a/gcc/diagnostics/html-sink.cc b/gcc/diagnostics/html-sink.cc index 07e7187..13d6309 100644 --- a/gcc/diagnostics/html-sink.cc +++ b/gcc/diagnostics/html-sink.cc @@ -1702,6 +1702,7 @@ html_sink_cc_tests () } } // namespace selftest -} // namespace diagnostics #endif /* CHECKING_P */ + +} // namespace diagnostics diff --git a/gcc/diagnostics/output-spec.cc b/gcc/diagnostics/output-spec.cc index 08128a9..83f128c 100644 --- a/gcc/diagnostics/output-spec.cc +++ b/gcc/diagnostics/output-spec.cc @@ -846,6 +846,7 @@ output_spec_cc_tests () } } // namespace diagnostics::selftest -} // namespace diagnostics #endif /* #if CHECKING_P */ + +} // namespace diagnostics diff --git a/gcc/diagnostics/sarif-sink.cc b/gcc/diagnostics/sarif-sink.cc index 05c0a8e..4738ae9 100644 --- a/gcc/diagnostics/sarif-sink.cc +++ b/gcc/diagnostics/sarif-sink.cc @@ -5072,6 +5072,7 @@ sarif_sink_cc_tests () } } // namespace diagnostics::selftest -} // namespace diagnostics #endif /* CHECKING_P */ + +} // namespace diagnostics diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256-2.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256-2.c new file mode 100644 index 0000000..7350fd9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256-2.c @@ -0,0 +1,49 @@ +/* { dg-additional-options "-mavx2" { target avx2_runtime } } */ + +#include "tree-vect.h" + +#define B 0 +#define G 1 +#define R 2 + +int red = 153; +int green = 66; +int blue = 187; + +static void __attribute__((noipa)) +sub_left_prediction_bgr32(int *restrict dst, int *restrict src) +{ + for (int i = 0; i < 8; i++) { + int rt = src[i * 3 + R]; + int gt = src[i * 3 + G]; + int bt = src[i * 3 + B]; + + dst[i * 3 + R] = rt - red; + dst[i * 3 + G] = gt - green; + dst[i * 3 + B] = bt - blue; + + red = rt; + green = gt; + blue = bt; + } +} + +int main() +{ + int dst[8*3]; + int src[8*3] = { 160, 73, 194, 17, 33, 99, 0, 12, 283, 87, 73, 11, + 9, 7, 1, 23, 19, 13, 77, 233, 97, 78, 2, 5 }; + int dst2[8*3] = {-27, 7, 41, -143, -40, -95, -17, -21, 184, 87, 61, + -272, -78, -66, -10, 14, 12, 12, 54, 214, 84, 1, -231, -92}; + + check_vect (); + + sub_left_prediction_bgr32(dst, src); + +#pragma GCC novector + for (int i = 0; i < 8*3; ++i) + if (dst[i] != dst2[i]) + __builtin_abort(); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c new file mode 100644 index 0000000..c895e94 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-pr121256.c @@ -0,0 +1,54 @@ +/* { dg-additional-options "-mavx2" { target avx2_runtime } } */ + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include "tree-vect.h" + +#define B 0 +#define G 1 +#define R 2 +#define A 3 + +int red = 153; +int green = 66; +int blue = 187; +int alpha = 255; + +static void __attribute__((noipa)) +sub_left_prediction_bgr32(uint8_t *restrict dst, uint8_t *restrict src, int w) +{ + for (int i = 0; i < 8; i++) { + int rt = src[i * 4 + R]; + int gt = src[i * 4 + G]; + int bt = src[i * 4 + B]; + int at = src[i * 4 + A]; + + dst[i * 4 + R] = rt - red; + dst[i * 4 + G] = gt - green; + dst[i * 4 + B] = bt - blue; + dst[i * 4 + A] = at - alpha; + + red = rt; + green = gt; + blue = bt; + alpha = at; + } +} + +int main() +{ + check_vect (); + + uint8_t *dst = calloc(36, sizeof(uint8_t)); + uint8_t *src = calloc(36, sizeof(uint8_t)); + + src[R] = 160; + src[G] = 73; + src[B] = 194; + src[A] = 255; + + sub_left_prediction_bgr32(dst, src, 33); + if (dst[R] != 7 || dst[B] != 7 || dst[A] != 0) + __builtin_abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1a.c b/gcc/testsuite/gcc.target/i386/pr121208-1a.c new file mode 100644 index 0000000..ac851cb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-1a.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */ + +extern __thread int bar; +extern void func (void); + +__attribute__((no_caller_saved_registers)) +void +foo (int error) +{ + bar = 1; /* { dg-error -mtls-dialect=gnu2 } */ + if (error == 0) + func (); + bar = 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1b.c b/gcc/testsuite/gcc.target/i386/pr121208-1b.c new file mode 100644 index 0000000..b97ac71 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-1b.c @@ -0,0 +1,4 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */ + +#include "pr121208-1a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr121208-2a.c b/gcc/testsuite/gcc.target/i386/pr121208-2a.c new file mode 100644 index 0000000..c1891ae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-2a.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */ + +typedef unsigned int uword_t __attribute__ ((mode (__word__))); +extern __thread int bar; +extern void func (void); + +__attribute__((target("general-regs-only"))) +__attribute__((interrupt)) +void +foo (void *frame, uword_t error) +{ + bar = 1; /* { dg-error -mtls-dialect=gnu2 } */ + if (error == 0) + func (); + bar = 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr121208-2b.c b/gcc/testsuite/gcc.target/i386/pr121208-2b.c new file mode 100644 index 0000000..269b120 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-2b.c @@ -0,0 +1,4 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */ + +#include "pr121208-2a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr121208-3a.c b/gcc/testsuite/gcc.target/i386/pr121208-3a.c new file mode 100644 index 0000000..26fe687 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-3a.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */ + +typedef unsigned int uword_t __attribute__ ((mode (__word__))); +extern __thread int bar; +extern void func (void); + +__attribute__((target("general-regs-only"))) +__attribute__((interrupt)) +void +foo (void *frame) +{ + bar = 1; /* { dg-error -mtls-dialect=gnu2 } */ + if (frame == 0) + func (); + bar = 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr121208-3b.c b/gcc/testsuite/gcc.target/i386/pr121208-3b.c new file mode 100644 index 0000000..b672d75 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-3b.c @@ -0,0 +1,4 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */ + +#include "pr121208-3a.c" diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_100.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100.c new file mode 100644 index 0000000..e759a11 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_100 -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_100a.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100a.c new file mode 100644 index 0000000..153ed1e --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100a.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_100a -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_100f.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100f.c new file mode 100644 index 0000000..9bb9127 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100f.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_100f -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_101.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101.c new file mode 100644 index 0000000..06b3ceb --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_101 -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_101a.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101a.c new file mode 100644 index 0000000..0cca3f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101a.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_101a -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_101f.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101f.c new file mode 100644 index 0000000..9548be5 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101f.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_101f -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_103.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103.c new file mode 100644 index 0000000..5731249 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_103 -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_103a.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103a.c new file mode 100644 index 0000000..aea501e --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103a.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_103a -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_103f.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103f.c new file mode 100644 index 0000000..59d8987 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103f.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_103f -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_120.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120.c new file mode 100644 index 0000000..d28a671 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_120 -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_120a.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120a.c new file mode 100644 index 0000000..613dd65 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120a.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_120a -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_120f.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120f.c new file mode 100644 index 0000000..1b23350 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120f.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_120f -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_121.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121.c new file mode 100644 index 0000000..240332b --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_121 -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_121a.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121a.c new file mode 100644 index 0000000..1e7fb70 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121a.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_121a -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_121f.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121f.c new file mode 100644 index 0000000..2cbec51 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121f.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_121f -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 2ee023f..80b5a0a 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1919,7 +1919,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, for (gcond *cond : info->conds) { stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (cond); - STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type; /* Mark the statement as a condition. */ STMT_VINFO_DEF_TYPE (loop_cond_info) = vect_condition_def; } @@ -1936,9 +1935,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, if (info->inner_loop_cond) { - stmt_vec_info inner_loop_cond_info - = loop_vinfo->lookup_stmt (info->inner_loop_cond); - STMT_VINFO_TYPE (inner_loop_cond_info) = loop_exit_ctrl_vec_info_type; /* If we have an estimate on the number of iterations of the inner loop use that to limit the scale for costing, otherwise use --param vect-inner-loop-cost-factor literally. */ @@ -7151,7 +7147,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, } /* Transform via vect_transform_reduction. */ - STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; + SLP_TREE_TYPE (slp_node) = reduc_vec_info_type; return true; } @@ -7253,18 +7249,17 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } /* Analysis for double-reduction is done on the outer loop PHI, nested cycles have no further restrictions. */ - STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type; + SLP_TREE_TYPE (slp_node) = cycle_phi_info_type; } else - STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; + SLP_TREE_TYPE (slp_node) = reduc_vec_info_type; return true; } - stmt_vec_info orig_stmt_of_analysis = stmt_info; stmt_vec_info phi_info = stmt_info; if (!is_a <gphi *> (stmt_info->stmt)) { - STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; + SLP_TREE_TYPE (slp_node) = reduc_vec_info_type; return true; } if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) @@ -8074,7 +8069,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, && reduction_type == FOLD_LEFT_REDUCTION) dump_printf_loc (MSG_NOTE, vect_location, "using an in-order (fold-left) reduction.\n"); - STMT_VINFO_TYPE (orig_stmt_of_analysis) = cycle_phi_info_type; + SLP_TREE_TYPE (slp_node) = cycle_phi_info_type; /* All but single defuse-cycle optimized and fold-left reductions go through their own vectorizable_* routines. */ @@ -8770,7 +8765,7 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo, return false; } - STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type; + SLP_TREE_TYPE (slp_node) = lc_phi_info_type; return true; } @@ -8855,7 +8850,7 @@ vectorizable_phi (vec_info *, if (gimple_phi_num_args (as_a <gphi *> (stmt_info->stmt)) > 1) record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), vector_stmt, stmt_info, vectype, 0, vect_body); - STMT_VINFO_TYPE (stmt_info) = phi_info_type; + SLP_TREE_TYPE (slp_node) = phi_info_type; return true; } @@ -8970,6 +8965,33 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, return false; } + /* We need to be able to build a { ..., a, b } init vector with + dist number of distinct trailing values. Always possible + when dist == 1 or when nunits is constant or when the initializations + are uniform. */ + tree uniform_initval = NULL_TREE; + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); + for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node)) + { + gphi *phi = as_a <gphi *> (s->stmt); + if (! uniform_initval) + uniform_initval = PHI_ARG_DEF_FROM_EDGE (phi, pe); + else if (! operand_equal_p (uniform_initval, + PHI_ARG_DEF_FROM_EDGE (phi, pe))) + { + uniform_initval = NULL_TREE; + break; + } + } + if (!uniform_initval && !nunits.is_constant ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "cannot build initialization vector for " + "first order recurrence\n"); + return false; + } + /* First-order recurrence autovectorization needs to handle permutation with indices = [nunits-1, nunits, nunits+1, ...]. */ vec_perm_builder sel (nunits, 1, 3); @@ -9016,25 +9038,42 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, "prologue_cost = %d .\n", inside_cost, prologue_cost); - STMT_VINFO_TYPE (stmt_info) = recurr_info_type; + SLP_TREE_TYPE (slp_node) = recurr_info_type; return true; } - edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); - basic_block bb = gimple_bb (phi); - tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe); - if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (preheader))) + tree vec_init; + if (! uniform_initval) { - gimple_seq stmts = NULL; - preheader = gimple_convert (&stmts, TREE_TYPE (vectype), preheader); - gsi_insert_seq_on_edge_immediate (pe, stmts); + vec<constructor_elt, va_gc> *v = NULL; + vec_alloc (v, nunits.to_constant ()); + for (unsigned i = 0; i < nunits.to_constant () - dist; ++i) + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + build_zero_cst (TREE_TYPE (vectype))); + for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node)) + { + gphi *phi = as_a <gphi *> (s->stmt); + tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe); + if (!useless_type_conversion_p (TREE_TYPE (vectype), + TREE_TYPE (preheader))) + { + gimple_seq stmts = NULL; + preheader = gimple_convert (&stmts, + TREE_TYPE (vectype), preheader); + gsi_insert_seq_on_edge_immediate (pe, stmts); + } + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, preheader); + } + vec_init = build_constructor (vectype, v); } - tree vec_init = build_vector_from_val (vectype, preheader); + else + vec_init = uniform_initval; vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL); /* Create the vectorized first-order PHI node. */ tree vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_recur_"); + basic_block bb = gimple_bb (phi); gphi *new_phi = create_phi_node (vec_dest, bb); add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION); @@ -9552,7 +9591,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, "prologue_cost = %d. \n", inside_cost, prologue_cost); - STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type; + SLP_TREE_TYPE (slp_node) = induc_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_nonlinear_induction"); return true; } @@ -9853,7 +9892,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, "prologue_cost = %d .\n", inside_cost, prologue_cost); - STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type; + SLP_TREE_TYPE (slp_node) = induc_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_induction"); return true; } diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index f0ddbf9..ffb320f 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -130,7 +130,6 @@ vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt, STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info; STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (orig_stmt_info); - STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info); if (!STMT_VINFO_VECTYPE (pattern_stmt_info)) { gcc_assert (!vectype diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index c0636d8..cb27d16 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -130,6 +130,8 @@ _slp_tree::_slp_tree () this->failed = NULL; this->max_nunits = 1; this->lanes = 0; + SLP_TREE_TYPE (this) = undef_vec_info_type; + this->u.undef = NULL; } /* Tear down a SLP node. */ @@ -8259,8 +8261,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, /* Masked loads can have an undefined (default SSA definition) else operand. We do not need to cost it. */ vec<tree> ops = SLP_TREE_SCALAR_OPS (child); - if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node)) - == load_vec_info_type) + if (SLP_TREE_TYPE (node) == load_vec_info_type && ((ops.length () && TREE_CODE (ops[0]) == SSA_NAME && SSA_NAME_IS_DEFAULT_DEF (ops[0]) @@ -8271,8 +8272,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, /* For shifts with a scalar argument we don't need to cost or code-generate anything. ??? Represent this more explicitely. */ - gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node)) - == shift_vec_info_type) + gcc_assert (SLP_TREE_TYPE (node) == shift_vec_info_type && j == 1); continue; } @@ -11308,9 +11308,9 @@ vect_schedule_slp_node (vec_info *vinfo, si = gsi_for_stmt (last_stmt_info->stmt); } else if (SLP_TREE_CODE (node) != VEC_PERM_EXPR - && (STMT_VINFO_TYPE (stmt_info) == cycle_phi_info_type - || STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type - || STMT_VINFO_TYPE (stmt_info) == phi_info_type)) + && (SLP_TREE_TYPE (node) == cycle_phi_info_type + || SLP_TREE_TYPE (node) == induc_vec_info_type + || SLP_TREE_TYPE (node) == phi_info_type)) { /* For PHI node vectorization we do not use the insertion iterator. */ si = gsi_none (); @@ -11330,8 +11330,7 @@ vect_schedule_slp_node (vec_info *vinfo, last scalar def here. */ if (SLP_TREE_VEC_DEFS (child).is_empty ()) { - gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (child)) - == cycle_phi_info_type); + gcc_assert (SLP_TREE_TYPE (child) == cycle_phi_info_type); gphi *phi = as_a <gphi *> (vect_find_last_scalar_stmt_in_slp (child)->stmt); if (!last_stmt) @@ -11482,7 +11481,7 @@ vect_schedule_slp_node (vec_info *vinfo, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "------>vectorizing SLP permutation node\n"); - /* ??? the transform kind is stored to STMT_VINFO_TYPE which might + /* ??? the transform kind was stored to STMT_VINFO_TYPE which might be shared with different SLP nodes (but usually it's the same operation apart from the case the stmt is only there for denoting the actual scalar lane defs ...). So do not call vect_transform_stmt diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index aa2657a..9edc4a8 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1422,7 +1422,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, int group_size, vect_memory_access_type memory_access_type, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree scalar_mask, vec<int> *elsvals = nullptr) { @@ -2771,7 +2771,7 @@ static gimple * vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree ptr, tree offset, tree mask) { tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); @@ -2869,7 +2869,7 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, static gimple * vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree ptr, tree offset, tree oprnd, tree mask) { tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); @@ -2950,8 +2950,8 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, containing loop. */ static void -vect_get_gather_scatter_ops (class loop *loop, - slp_tree slp_node, gather_scatter_info *gs_info, +vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node, + const gather_scatter_info *gs_info, tree *dataref_ptr, vec<tree> *vec_offset) { gimple_seq stmts = NULL; @@ -2979,7 +2979,7 @@ static void vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype, loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree *dataref_bump, tree *vec_offset, vec_loop_lens *loop_lens) { @@ -3158,7 +3158,7 @@ vectorizable_bswap (vec_info *vinfo, return false; } - STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; + SLP_TREE_TYPE (slp_node) = call_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_bswap"); record_stmt_cost (cost_vec, 1, vector_stmt, stmt_info, 0, vect_prologue); @@ -3487,7 +3487,7 @@ vectorizable_call (vec_info *vinfo, "incompatible vector types for invariants\n"); return false; } - STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; + SLP_TREE_TYPE (slp_node) = call_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_call"); vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); @@ -4282,7 +4282,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } - STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; + SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_simd_clone_call"); /* vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); */ return true; @@ -5427,13 +5427,13 @@ vectorizable_conversion (vec_info *vinfo, DUMP_VECT_SCOPE ("vectorizable_conversion"); if (modifier == NONE) { - STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; + SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type; vect_model_simple_cost (vinfo, (1 + multi_step_cvt), slp_node, cost_vec); } else if (modifier == NARROW_SRC || modifier == NARROW_DST) { - STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; + SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type; /* The final packing step produces one vector result per copy. */ unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); vect_model_promotion_demotion_cost (stmt_info, dt, nvectors, @@ -5442,7 +5442,7 @@ vectorizable_conversion (vec_info *vinfo, } else { - STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; + SLP_TREE_TYPE (slp_node) = type_promotion_vec_info_type; /* The initial unpacking step produces two vector results per copy. MULTI_STEP_CVT is 0 for a single conversion, so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */ @@ -5777,7 +5777,7 @@ vectorizable_assignment (vec_info *vinfo, "incompatible vector types for invariants\n"); return false; } - STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; + SLP_TREE_TYPE (slp_node) = assignment_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_assignment"); if (!vect_nop_conversion_p (stmt_info)) vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); @@ -6122,7 +6122,7 @@ vectorizable_shift (vec_info *vinfo, gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i]) == INTEGER_CST)); } - STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; + SLP_TREE_TYPE (slp_node) = shift_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_shift"); vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); return true; @@ -6541,7 +6541,7 @@ vectorizable_operation (vec_info *vinfo, return false; } - STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; + SLP_TREE_TYPE (slp_node) = op_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_operation"); vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); if (using_emulated_vectors_p) @@ -7974,7 +7974,7 @@ vectorizable_store (vec_info *vinfo, dump_printf_loc (MSG_NOTE, vect_location, "Vectorizing an unaligned access.\n"); - STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; + SLP_TREE_TYPE (slp_node) = store_vec_info_type; } gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info)); @@ -9572,7 +9572,7 @@ vectorizable_load (vec_info *vinfo, if (memory_access_type == VMAT_LOAD_STORE_LANES) vinfo->any_known_not_updated_vssa = true; - STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; + SLP_TREE_TYPE (slp_node) = load_vec_info_type; } else { @@ -11749,7 +11749,7 @@ vectorizable_condition (vec_info *vinfo, } } - STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; + SLP_TREE_TYPE (slp_node) = condition_vec_info_type; vect_model_simple_cost (vinfo, 1, slp_node, cost_vec, kind); return true; } @@ -12256,7 +12256,7 @@ vectorizable_comparison (vec_info *vinfo, return false; if (cost_vec) - STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type; + SLP_TREE_TYPE (slp_node) = comparison_vec_info_type; return true; } @@ -12657,8 +12657,8 @@ vect_analyze_stmt (vec_info *vinfo, /* Stmts that are (also) "live" (i.e. - that are used out of the loop) need extra handling, except for vectorizable reductions. */ if (!bb_vinfo - && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type - && (STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type + && SLP_TREE_TYPE (node) != reduc_vec_info_type + && (SLP_TREE_TYPE (node) != lc_phi_info_type || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def) && (!node->ldst_lanes || SLP_TREE_CODE (node) == VEC_PERM_EXPR) && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo), @@ -12694,7 +12694,7 @@ vect_transform_stmt (vec_info *vinfo, tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info); STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node); - switch (STMT_VINFO_TYPE (stmt_info)) + switch (SLP_TREE_TYPE (slp_node)) { case type_demotion_vec_info_type: case type_promotion_vec_info_type: @@ -12811,7 +12811,7 @@ vect_transform_stmt (vec_info *vinfo, done = true; } - if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type + if (SLP_TREE_TYPE (slp_node) != store_vec_info_type && (!slp_node->ldst_lanes || SLP_TREE_CODE (slp_node) == VEC_PERM_EXPR)) { diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc index 066c8a8..f992856 100644 --- a/gcc/tree-vectorizer.cc +++ b/gcc/tree-vectorizer.cc @@ -715,7 +715,6 @@ vec_info::new_stmt_vec_info (gimple *stmt) stmt_vec_info res = XCNEW (class _stmt_vec_info); res->stmt = stmt; - STMT_VINFO_TYPE (res) = undef_vec_info_type; STMT_VINFO_RELEVANT (res) = vect_unused_in_scope; STMT_VINFO_VECTORIZABLE (res) = true; STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index e8be608..4a1e4fc 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -205,6 +205,32 @@ enum vect_memory_access_type { VMAT_GATHER_SCATTER }; +/*-----------------------------------------------------------------*/ +/* Info on vectorized defs. */ +/*-----------------------------------------------------------------*/ +enum stmt_vec_info_type { + undef_vec_info_type = 0, + load_vec_info_type, + store_vec_info_type, + shift_vec_info_type, + op_vec_info_type, + call_vec_info_type, + call_simd_clone_vec_info_type, + assignment_vec_info_type, + condition_vec_info_type, + comparison_vec_info_type, + reduc_vec_info_type, + induc_vec_info_type, + type_promotion_vec_info_type, + type_demotion_vec_info_type, + type_conversion_vec_info_type, + cycle_phi_info_type, + lc_phi_info_type, + phi_info_type, + recurr_info_type, + loop_exit_ctrl_vec_info_type +}; + /************************************************************************ SLP ************************************************************************/ @@ -279,6 +305,13 @@ struct _slp_tree { for loop vectorization. */ vect_memory_access_type memory_access_type; + /* The kind of operation as determined by analysis and a tagged + union with kind specific data. */ + enum stmt_vec_info_type type; + union { + void *undef; + } u; + /* If not NULL this is a cached failed SLP discovery attempt with the lanes that failed during SLP discovery as 'false'. This is a copy of the matches array. */ @@ -364,6 +397,7 @@ public: #define SLP_TREE_LANES(S) (S)->lanes #define SLP_TREE_CODE(S) (S)->code #define SLP_TREE_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type +#define SLP_TREE_TYPE(S) (S)->type enum vect_partial_vector_style { vect_partial_vectors_none, @@ -1211,32 +1245,6 @@ public: #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs #define BB_VINFO_DDRS(B) (B)->shared->ddrs -/*-----------------------------------------------------------------*/ -/* Info on vectorized defs. */ -/*-----------------------------------------------------------------*/ -enum stmt_vec_info_type { - undef_vec_info_type = 0, - load_vec_info_type, - store_vec_info_type, - shift_vec_info_type, - op_vec_info_type, - call_vec_info_type, - call_simd_clone_vec_info_type, - assignment_vec_info_type, - condition_vec_info_type, - comparison_vec_info_type, - reduc_vec_info_type, - induc_vec_info_type, - type_promotion_vec_info_type, - type_demotion_vec_info_type, - type_conversion_vec_info_type, - cycle_phi_info_type, - lc_phi_info_type, - phi_info_type, - recurr_info_type, - loop_exit_ctrl_vec_info_type -}; - /* Indicates whether/how a variable is used in the scope of loop/basic block. */ enum vect_relevant { @@ -1329,8 +1337,6 @@ typedef struct data_reference *dr_p; class _stmt_vec_info { public: - enum stmt_vec_info_type type; - /* Indicates whether this stmts is part of a computation whose result is used outside the loop. */ bool live; @@ -1569,7 +1575,6 @@ struct gather_scatter_info { }; /* Access Functions. */ -#define STMT_VINFO_TYPE(S) (S)->type #define STMT_VINFO_STMT(S) (S)->stmt #define STMT_VINFO_RELEVANT(S) (S)->relevant #define STMT_VINFO_LIVE_P(S) (S)->live diff --git a/libstdc++-v3/include/bits/stl_iterator_base_funcs.h b/libstdc++-v3/include/bits/stl_iterator_base_funcs.h index 637159f..f78e535 100644 --- a/libstdc++-v3/include/bits/stl_iterator_base_funcs.h +++ b/libstdc++-v3/include/bits/stl_iterator_base_funcs.h @@ -130,6 +130,28 @@ _GLIBCXX_END_NAMESPACE_CONTAINER __distance(_OutputIterator, _OutputIterator, output_iterator_tag) = delete; #endif +#ifdef __glibcxx_concepts +namespace __detail +{ + // Satisfied if ITER_TRAITS(Iter)::iterator_category is valid and is + // at least as strong as ITER_TRAITS(Iter)::iterator_concept. + template<typename _Iter> + concept __iter_category_converts_to_concept + = convertible_to<typename __iter_traits<_Iter>::iterator_category, + typename __iter_traits<_Iter>::iterator_concept>; + + // Satisfied if the type is a C++20 iterator that defines iterator_concept, + // and its iterator_concept is stronger than its iterator_category (if any). + // Used by std::distance and std::advance to detect iterators which should + // dispatch based on their C++20 concept not their C++17 category. + template<typename _Iter> + concept __promotable_iterator + = input_iterator<_Iter> + && requires { typename __iter_traits<_Iter>::iterator_concept; } + && ! __iter_category_converts_to_concept<_Iter>; +} // namespace __detail +#endif + /** * @brief A generalization of pointer arithmetic. * @param __first An input iterator. @@ -149,6 +171,24 @@ _GLIBCXX_END_NAMESPACE_CONTAINER typename iterator_traits<_InputIterator>::difference_type distance(_InputIterator __first, _InputIterator __last) { +#ifdef __glibcxx_concepts + // A type which satisfies the C++20 random_access_iterator concept might + // have input_iterator_tag as its iterator_category type, which would + // mean we select the O(n) __distance. Or a C++20 std::input_iterator + // that is not a Cpp17InputIterator might have output_iterator_tag as + // its iterator_category type and then calling __distance with + // std::__iterator_category(__first) would be ill-formed. + // So for C++20 iterator types we can just choose to do the right thing. + if constexpr (__detail::__promotable_iterator<_InputIterator>) + { + if constexpr (random_access_iterator<_InputIterator>) + return __last - __first; + else + return std::__distance(std::move(__first), std::move(__last), + input_iterator_tag()); + } + else // assume it meets the Cpp17InputIterator requirements: +#endif // concept requirements -- taken care of in __distance return std::__distance(__first, __last, std::__iterator_category(__first)); @@ -221,9 +261,31 @@ _GLIBCXX_END_NAMESPACE_CONTAINER inline _GLIBCXX17_CONSTEXPR void advance(_InputIterator& __i, _Distance __n) { - // concept requirements -- taken care of in __advance - typename iterator_traits<_InputIterator>::difference_type __d = __n; - std::__advance(__i, __d, std::__iterator_category(__i)); +#ifdef __glibcxx_concepts + // A type which satisfies the C++20 bidirectional_iterator concept might + // have input_iterator_tag as its iterator_category type, which would + // mean we select the __advance overload which cannot move backwards. + // A C++20 random_access_iterator we might select the O(n) __advance + // if it doesn't meet the Cpp17RandomAccessIterator requirements. + // So for C++20 iterator types we can just choose to do the right thing. + if constexpr (__detail::__promotable_iterator<_InputIterator> + && ranges::__detail::__is_integer_like<_Distance>) + { + auto __d = static_cast<iter_difference_t<_InputIterator>>(__n); + if constexpr (random_access_iterator<_InputIterator>) + std::__advance(__i, __d, random_access_iterator_tag()); + else if constexpr (bidirectional_iterator<_InputIterator>) + std::__advance(__i, __d, bidirectional_iterator_tag()); + else + std::__advance(__i, __d, input_iterator_tag()); + } + else // assume it meets the Cpp17InputIterator requirements: +#endif + { + // concept requirements -- taken care of in __advance + typename iterator_traits<_InputIterator>::difference_type __d = __n; + std::__advance(__i, __d, std::__iterator_category(__i)); + } } #if __cplusplus >= 201103L diff --git a/libstdc++-v3/include/debug/bitset b/libstdc++-v3/include/debug/bitset index ad9b7b5..e4d3e66 100644 --- a/libstdc++-v3/include/debug/bitset +++ b/libstdc++-v3/include/debug/bitset @@ -164,6 +164,17 @@ namespace __debug _CharT __zero, _CharT __one = _CharT('1')) : _Base(__str, __pos, __n, __zero, __one) { } +#ifdef __cpp_lib_bitset // ... from string_view + template<class _CharT, class _Traits> + constexpr explicit + bitset(std::basic_string_view<_CharT, _Traits> __s, + std::basic_string_view<_CharT, _Traits>::size_type __position = 0, + std::basic_string_view<_CharT, _Traits>::size_type __n = + std::basic_string_view<_CharT, _Traits>::npos, + _CharT __zero = _CharT('0'), _CharT __one = _CharT('1')) + : _Base(__s, __position, __n, __zero, __one) { } +#endif + _GLIBCXX23_CONSTEXPR bitset(const _Base& __x) : _Base(__x) { } diff --git a/libstdc++-v3/include/std/inplace_vector b/libstdc++-v3/include/std/inplace_vector index 290cf6e..b5a81be 100644 --- a/libstdc++-v3/include/std/inplace_vector +++ b/libstdc++-v3/include/std/inplace_vector @@ -1354,7 +1354,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } - template<typename _Tp, size_t _Nm, typename _Up> + template<typename _Tp, size_t _Nm, typename _Up = _Tp> constexpr size_t erase(inplace_vector<_Tp, _Nm>& __cont, const _Up& __value) { diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan index 055778d..5e79d4b 100644 --- a/libstdc++-v3/include/std/mdspan +++ b/libstdc++-v3/include/std/mdspan @@ -159,9 +159,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION static constexpr span<const size_t> _S_static_extents(size_t __begin, size_t __end) noexcept - { - return {_Extents.data() + __begin, _Extents.data() + __end}; - } + { return {_Extents.data() + __begin, _Extents.data() + __end}; } constexpr span<const _IndexType> _M_dynamic_extents(size_t __begin, size_t __end) const noexcept @@ -185,10 +183,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION concept __valid_static_extent = _Extent == dynamic_extent || _Extent <= numeric_limits<_IndexType>::max(); - } - namespace __mdspan - { template<typename _Extents> constexpr span<const size_t> __static_extents(size_t __begin = 0, size_t __end = _Extents::rank()) @@ -199,9 +194,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION constexpr span<const typename _Extents::index_type> __dynamic_extents(const _Extents& __exts, size_t __begin = 0, size_t __end = _Extents::rank()) noexcept - { - return __exts._M_exts._M_dynamic_extents(__begin, __end); - } + { return __exts._M_exts._M_dynamic_extents(__begin, __end); } } template<typename _IndexType, size_t... _Extents> @@ -1248,9 +1241,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION [[nodiscard]] constexpr bool empty() const noexcept - { - return __mdspan::__empty(extents()); - } + { return __mdspan::__empty(extents()); } friend constexpr void swap(mdspan& __x, mdspan& __y) noexcept @@ -1299,7 +1290,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION constexpr bool is_strided() const noexcept(noexcept(_M_mapping.is_strided())) - { return _M_mapping. is_strided(); } + { return _M_mapping.is_strided(); } constexpr index_type stride(rank_type __r) const { return _M_mapping.stride(__r); } diff --git a/libstdc++-v3/testsuite/23_containers/inplace_vector/erasure.cc b/libstdc++-v3/testsuite/23_containers/inplace_vector/erasure.cc index c7fda09..8fb56e9 100644 --- a/libstdc++-v3/testsuite/23_containers/inplace_vector/erasure.cc +++ b/libstdc++-v3/testsuite/23_containers/inplace_vector/erasure.cc @@ -2,18 +2,38 @@ #include <inplace_vector> #include <testsuite_hooks.h> +#include <span> + +template<typename T, size_t N> +constexpr bool +eq(const std::inplace_vector<T, N>& l, std::span<const T> r) { + if (l.size() != r.size()) + return false; + for (auto i = 0u; i < l.size(); ++i) + if (l[i] != r[i]) + return false; + return true; +}; constexpr void test_erase() { - std::inplace_vector<int, 15> c{1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1, 4, 4, 9}; + std::inplace_vector<int, 15> c{1, 0, 3, 4, 5, 6, 5, 4, 3, 0, 1, 4, 4, 9}; std::erase(c, 4); VERIFY( c.size() == 10 ); std::erase(c, 1); VERIFY( c.size() == 8 ); std::erase(c, 9); VERIFY( c.size() == 7 ); - VERIFY( (c == std::inplace_vector<int, 15>{2, 3, 5, 6, 5, 3, 2}) ); + VERIFY( eq(c, {0, 3, 5, 6, 5, 3, 0}) ); + + std::erase(c, {}); + VERIFY( c.size() == 5 ); + VERIFY( eq(c, {3, 5, 6, 5, 3}) ); + + std::erase(c, {5}); + VERIFY( c.size() == 3 ); + VERIFY( eq(c, {3, 6, 3}) ); std::inplace_vector<int, 0> e; std::erase(e, 10); @@ -29,7 +49,7 @@ test_erase_if() std::erase_if(c, [](int i) { return i == 4; }); VERIFY( c.size() == 8 ); std::erase_if(c, [](int i) { return i & 1; }); - VERIFY( (c == std::inplace_vector<int, 15>{2, 2}) ); + VERIFY( eq(c, {2, 2}) ); std::inplace_vector<int, 0> e; std::erase_if(e, [](int i) { return i > 5; }); diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/accessors/generic.cc b/libstdc++-v3/testsuite/23_containers/mdspan/accessors/generic.cc index c335035..66009ad 100644 --- a/libstdc++-v3/testsuite/23_containers/mdspan/accessors/generic.cc +++ b/libstdc++-v3/testsuite/23_containers/mdspan/accessors/generic.cc @@ -29,44 +29,59 @@ class Base class Derived : public Base { }; -template<template<typename T> typename Accessor> +template<typename RhsAccessor, typename LhsAccessor, bool ExpectConvertible> + constexpr void + check_convertible() + { + RhsAccessor rhs; + [[maybe_unused]] LhsAccessor lhs(rhs); + static_assert(std::is_nothrow_constructible_v<LhsAccessor, RhsAccessor>); + static_assert(std::is_convertible_v<RhsAccessor, LhsAccessor> == ExpectConvertible); + } + +template<template<typename T> typename LhsAccessor, + template<typename T> typename RhsAccessor = LhsAccessor, + bool ExpectConvertible = true> constexpr bool test_ctor() { // T -> T - static_assert(std::is_nothrow_constructible_v<Accessor<double>, - Accessor<double>>); - static_assert(std::is_convertible_v<Accessor<double>, Accessor<double>>); + check_convertible<RhsAccessor<double>, LhsAccessor<double>, + ExpectConvertible>(); // T -> const T - static_assert(std::is_convertible_v<Accessor<double>, - Accessor<const double>>); - static_assert(std::is_convertible_v<Accessor<Derived>, - Accessor<const Derived>>); + check_convertible<RhsAccessor<double>, LhsAccessor<const double>, + ExpectConvertible>(); + check_convertible<RhsAccessor<Derived>, LhsAccessor<const Derived>, + ExpectConvertible>(); // const T -> T - static_assert(!std::is_constructible_v<Accessor<double>, - Accessor<const double>>); - static_assert(!std::is_constructible_v<Accessor<Derived>, - Accessor<const Derived>>); + static_assert(!std::is_constructible_v<LhsAccessor<double>, + RhsAccessor<const double>>); + static_assert(!std::is_constructible_v<LhsAccessor<Derived>, + RhsAccessor<const Derived>>); // T <-> volatile T - static_assert(std::is_convertible_v<Accessor<int>, Accessor<volatile int>>); - static_assert(!std::is_constructible_v<Accessor<int>, - Accessor<volatile int>>); + check_convertible<RhsAccessor<int>, LhsAccessor<volatile int>, + ExpectConvertible>(); + static_assert(!std::is_constructible_v<LhsAccessor<int>, + RhsAccessor<volatile int>>); // size difference - static_assert(!std::is_constructible_v<Accessor<char>, Accessor<int>>); + static_assert(!std::is_constructible_v<LhsAccessor<char>, + RhsAccessor<int>>); // signedness - static_assert(!std::is_constructible_v<Accessor<int>, - Accessor<unsigned int>>); - static_assert(!std::is_constructible_v<Accessor<unsigned int>, - Accessor<int>>); + static_assert(!std::is_constructible_v<LhsAccessor<int>, + RhsAccessor<unsigned int>>); + static_assert(!std::is_constructible_v<LhsAccessor<unsigned int>, + RhsAccessor<int>>); // Derived <-> Base - static_assert(!std::is_constructible_v<Accessor<Base>, Accessor<Derived>>); - static_assert(!std::is_constructible_v<Accessor<Derived>, Accessor<Base>>); + static_assert(!std::is_constructible_v<LhsAccessor<Base>, + RhsAccessor<Derived>>); + static_assert(!std::is_constructible_v<LhsAccessor<Derived>, + RhsAccessor<Base>>); return true; } diff --git a/libstdc++-v3/testsuite/24_iterators/operations/cxx20_iterators.cc b/libstdc++-v3/testsuite/24_iterators/operations/cxx20_iterators.cc new file mode 100644 index 0000000..b613c37 --- /dev/null +++ b/libstdc++-v3/testsuite/24_iterators/operations/cxx20_iterators.cc @@ -0,0 +1,60 @@ +// { dg-do run { target c++20 } } + +#include <ranges> +#include <testsuite_iterators.h> +#include <testsuite_hooks.h> + +// Bug 102181 std::advance and std::views::iota<std::int64_t> don't work +void +test_pr102181() +{ +#ifdef __SIZEOF_INT128__ + using type = unsigned __int128; +#else + using type = unsigned long; +#endif + auto v = std::ranges::iota_view(type(0), type(10)); + auto b = v.begin(); + VERIFY( std::distance(b, std::next(b)) == 1 ); + std::advance(b, std::iter_difference_t<decltype(b)>(1)); + VERIFY( *b == 1 ); + VERIFY( std::distance(b, v.end()) == 9 ); +} + +// https://stackoverflow.com/questions/68100775/rangesviewtransform-produces-an-inputiterator-preventing-the-use-of-stdpre +void +test_transform_view_iterator() +{ + int a[] = {0, 1, 2, 3}; + __gnu_test::random_access_container<int> rr(a); + auto rx = std::ranges::views::transform(rr, std::identity{}); + auto re = rx.end(); + VERIFY( *std::prev(re) == 3 ); + VERIFY( std::distance(rx.begin(), re) == 4 ); + + __gnu_test::bidirectional_container<int> br(a); + auto bx = std::ranges::views::transform(br, std::identity{}); + auto be = bx.end(); + VERIFY( *std::prev(be) == 3 ); + VERIFY( std::distance(bx.begin(), be) == 4 ); + + __gnu_test::forward_container<int> fr(a); + auto fx = std::ranges::views::transform(br, std::identity{}); + auto fb = fx.begin(); + VERIFY( *std::next(fb) == 1 ); + VERIFY( std::distance(fb, fx.end()) == 4 ); + + __gnu_test::test_input_range<int> ir(a); + auto ix = std::ranges::views::transform(ir, std::identity{}); + auto ii = ix.begin(); + std::advance(ii, 1); + VERIFY( *ii == 1 ); + // N.B. cannot use std::distance or std::next here because there is no + // iterator_traits<decltype(ii)>::difference_type for this iterator. +} + +int main() +{ + test_pr102181(); + test_transform_view_iterator(); +} |