diff options
54 files changed, 1272 insertions, 134 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 264bbd2..5b54c5a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,74 @@ +2025-04-21 Jan Hubicka <hubicka@ucw.cz> + + PR target/119879 + * config/i386/i386.cc (fp_conversion_stmt_cost): Inline to ... + (ix86_vector_costs::add_stmt_cost): ... here; fix handling of NOP_EXPR. + +2025-04-21 Matthew Fortune <matthew.fortune@imgtec.com> + + * config/mips/mips.cc (mips_option_override): Error out for + -mmicromips -mmsa. + +2025-04-21 Andrew Pinski <quic_apinski@quicinc.com> + + PR middle-end/119507 + * except.cc (switch_to_exception_section): Don't use the cached section if + the current function is in comdat. + +2025-04-21 Andrew Pinski <quic_apinski@quicinc.com> + + * vec.h (array_slice::begin): Assert that the + slice is valid. + (array_slice::end): Likewise. + +2025-04-21 hongtao.liu <hongtao.liu@intel.com> + + * config/i386/i386-expand.cc (ix86_emit_swdivsf): Generate 2 + FMA instructions when TARGET_FMA. + +2025-04-19 Jeff Law <jlaw@ventanamicro.com> + + PR target/119865 + * config/riscv/riscv.cc (parse_features_for_version): Do not + explicitly free the architecture string. + +2025-04-19 Jeff Law <jlaw@ventanamicro.com> + + PR target/118410 + * config/riscv/bitmanip.md (logical with constant argument): New + splitter for cases where synthesizing ~C is cheaper than synthesizing + the original constant C. + +2025-04-19 Jan Hubicka <hubicka@ucw.cz> + + * config/i386/i386.cc (vec_fp_conversion_cost): New function. + (ix86_rtx_costs): Use it for SSE/AVX FP conversoins. + (ix86_builtin_vectorization_cost): Fix indentation; + and use vec_fp_conversion_cost in vec_promote_demote. + (fp_conversion_stmt_cost): New function. + (ix86_vector_costs::add_stmt_cost): Use it to cost NOP_EXPR + and vec_promote_demote. + * config/i386/i386.h (struct processor_costs): + * config/i386/x86-tune-costs.h (struct processor_costs): + +2025-04-19 Andrew Pinski <quic_apinski@quicinc.com> + + PR rtl-optimization/111949 + * combine.cc (find_split_point): Add a split point + for `(and (not X) Y)` if not in the outer set already. + +2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com> + + PR target/111814 + * config/sh/sh-modes.def (RESET_FLOAT_FORMAT): Use mips format. + (FLOAT_MODE): Use mips mode. + +2025-04-19 Maciej W. Rozycki <macro@orcam.me.uk> + + * config/alpha/alpha.cc + (alpha_get_mem_rtx_alignment_and_offset): Recurse into + COMPONENT_REF nodes. + 2025-04-18 Jeff Law <jlaw@ventanamicro.com> * config/riscv/bitmanip.md (*bext<mode>_mask_pos): New pattern diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index f0d1b43..fa0255d 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250419 +20250422 diff --git a/gcc/combine.cc b/gcc/combine.cc index e118608..873c2bd 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -5280,6 +5280,12 @@ find_split_point (rtx *loc, rtx_insn *insn, bool set_src) SUBST (XEXP (x, 0), XEXP (x, 1)); SUBST (XEXP (x, 1), tem); } + /* Many targets have a `(and (not X) Y)` and/or `(ior (not X) Y)` instructions. + Split at that insns. However if this is + the SET_SRC, we likely do not have such an instruction and it's + worthless to try this split. */ + if (!set_src && GET_CODE (XEXP (x, 0)) == NOT) + return loc; break; case PLUS: diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index ba470d9..14e7da5 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -4291,14 +4291,10 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o) tree mem = MEM_EXPR (expr); if (mem != NULL_TREE) - switch (TREE_CODE (mem)) - { - case MEM_REF: - tree_offset = mem_ref_offset (mem).force_shwi (); - tree_align = get_object_alignment (get_base_address (mem)); - break; + { + HOST_WIDE_INT comp_offset = 0; - case COMPONENT_REF: + for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0)) { tree byte_offset = component_ref_field_offset (mem); tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1)); @@ -4307,14 +4303,15 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o) || !poly_int_tree_p (byte_offset, &offset) || !tree_fits_shwi_p (bit_offset)) break; - tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT; + comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT; } - tree_align = get_object_alignment (get_base_address (mem)); - break; - default: - break; - } + if (TREE_CODE (mem) == MEM_REF) + { + tree_offset = comp_offset + mem_ref_offset (mem).force_shwi (); + tree_align = get_object_alignment (get_base_address (mem)); + } + } if (reg_align > mem_align) { diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index cdfd94d..36f71eb 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -19256,8 +19256,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) e1 = gen_reg_rtx (mode); x1 = gen_reg_rtx (mode); - /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ - b = force_reg (mode, b); /* x0 = rcp(b) estimate */ @@ -19270,20 +19268,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), UNSPEC_RCP))); - /* e0 = x0 * b */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); + unsigned vector_size = GET_MODE_SIZE (mode); + + /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a + N-R step with 2 fma implementation. */ + if (TARGET_FMA + || (TARGET_AVX512F && vector_size == 64) + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) + { + /* e0 = x0 * a */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); + /* e1 = e0 * b - a */ + emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b, + gen_rtx_NEG (mode, a)))); + /* res = - e1 * x0 + e0 */ + emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, e1), + x0, e0))); + } + else + /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ + { + /* e0 = x0 * b */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); - /* e0 = x0 * e0 */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); + /* e1 = x0 + x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); - /* e1 = x0 + x0 */ - emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); + /* e0 = x0 * e0 */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); - /* x1 = e1 - e0 */ - emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); + /* x1 = e1 - e0 */ + emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); - /* res = a * x1 */ - emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + /* res = a * x1 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + } } /* Output code to perform a Newton-Rhapson approximation of a diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 38df84f..d15f91d 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see #include "i386-features.h" #include "function-abi.h" #include "rtl-error.h" +#include "gimple-pretty-print.h" /* This file should be included last. */ #include "target-def.h" @@ -21816,6 +21817,25 @@ ix86_insn_cost (rtx_insn *insn, bool speed) return insn_cost + pattern_cost (PATTERN (insn), speed); } +/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */ + +static int +vec_fp_conversion_cost (const struct processor_costs *cost, int size) +{ + if (size < 128) + return cost->cvtss2sd; + else if (size < 256) + { + if (TARGET_SSE_SPLIT_REGS) + return cost->cvtss2sd * size / 64; + return cost->cvtss2sd; + } + if (size < 512) + return cost->vcvtps2pd256; + else + return cost->vcvtps2pd512; +} + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ @@ -22479,17 +22499,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, return false; case FLOAT_EXTEND: + /* x87 represents all values extended to 80bit. */ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = 0; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; case FLOAT_TRUNCATE: if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = cost->fadd; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; case ABS: @@ -24683,7 +24704,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, switch (type_of_cost) { case scalar_stmt: - return fp ? ix86_cost->addss : COSTS_N_INSNS (1); + return fp ? ix86_cost->addss : COSTS_N_INSNS (1); case scalar_load: /* load/store costs are relative to register move which is 2. Recompute @@ -24754,7 +24775,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return ix86_cost->cond_not_taken_branch_cost; case vec_perm: + return ix86_vec_cost (mode, ix86_cost->sse_op); + case vec_promote_demote: + if (fp) + return vec_fp_conversion_cost (ix86_tune_cost, mode); return ix86_vec_cost (mode, ix86_cost->sse_op); case vec_construct: @@ -25342,6 +25367,9 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) stmt_cost = 0; + else if (fp) + stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); break; case BIT_IOR_EXPR: @@ -25383,6 +25411,29 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, break; } + if (kind == vec_promote_demote + && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) + { + int outer_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)))); + int inner_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))); + int stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end + up doing two conversions and packing them. */ + if (inner_size > outer_size) + { + int n = inner_size / outer_size; + stmt_cost = stmt_cost * n + + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op); + } + } + /* If we do elementwise loads into a vector then we are bound by latency and execution resources for the many scalar loads (AGU and load ports). Try to account for this by scaling the diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8507243..18aa42d 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -207,6 +207,12 @@ struct processor_costs { const int divsd; /* cost of DIVSD instructions. */ const int sqrtss; /* cost of SQRTSS instructions. */ const int sqrtsd; /* cost of SQRTSD instructions. */ + const int cvtss2sd; /* cost SSE FP conversions, + such as CVTSS2SD. */ + const int vcvtps2pd256; /* cost 256bit packed FP conversions, + such as VCVTPD2PS with larger reg in ymm. */ + const int vcvtps2pd512; /* cost 512bit packed FP conversions, + such as VCVTPD2PS with larger reg in zmm. */ const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp; /* Specify reassociation width for integer, fp, vector integer and vector fp diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 9477345..cddcf61 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -121,16 +121,19 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of FCHS instruction. */ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ - COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */ - COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */ - COSTS_N_BYTES (2), /* cost of MULSS instruction. */ - COSTS_N_BYTES (2), /* cost of MULSD instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SS instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SD instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSS instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSD instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */ + COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_BYTES (4), /* cost of MULSS instruction. */ + COSTS_N_BYTES (4), /* cost of MULSD instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SS instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SD instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSS instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSD instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */ + COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, @@ -243,6 +246,9 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (88), /* cost of DIVSD instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, @@ -356,6 +362,9 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (74), /* cost of DIVSD instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, @@ -467,6 +476,9 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (39), /* cost of DIVSD instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -571,6 +583,9 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -690,6 +705,9 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (18), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, @@ -800,6 +818,9 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (47), /* cost of DIVSD instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, @@ -913,6 +934,9 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (56), /* cost of DIVSD instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, @@ -1027,6 +1051,9 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (24), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, @@ -1150,6 +1177,9 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, @@ -1281,6 +1311,9 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, @@ -1405,6 +1438,9 @@ const struct processor_costs bdver_cost = { COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver_memcpy, bdver_memset, @@ -1553,6 +1589,10 @@ struct processor_costs znver1_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + /* Real latency is 4, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests @@ -1712,6 +1752,9 @@ struct processor_costs znver2_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1847,6 +1890,9 @@ struct processor_costs znver3_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1984,6 +2030,10 @@ struct processor_costs znver4_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + /* Real latency is 6, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2135,6 +2185,9 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ /* DIVSD has throughtput 0.13 and latency 20. */ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ /* Zen5 can execute: - integer ops: 6 per cycle, at most 3 multiplications. latency 1 for additions, 3 for multiplications (pipelined) @@ -2274,6 +2327,9 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ skylake_memcpy, skylake_memset, @@ -2403,6 +2459,9 @@ struct processor_costs icelake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ icelake_memcpy, icelake_memset, @@ -2526,6 +2585,9 @@ struct processor_costs alderlake_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ alderlake_memcpy, alderlake_memset, @@ -2642,6 +2704,9 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, @@ -2755,6 +2820,9 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (19), /* cost of DIVSD instruction. */ COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, @@ -2867,6 +2935,9 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (38), /* cost of DIVSD instruction. */ COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, @@ -2982,6 +3053,9 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (40), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, @@ -3095,6 +3169,9 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, @@ -3208,6 +3285,9 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (69), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, @@ -3335,6 +3415,9 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ tremont_memcpy, tremont_memset, @@ -3448,6 +3531,9 @@ struct processor_costs intel_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ intel_memcpy, intel_memset, @@ -3566,6 +3652,9 @@ struct processor_costs lujiazui_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ lujiazui_memcpy, lujiazui_memset, @@ -3682,6 +3771,9 @@ struct processor_costs yongfeng_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ yongfeng_memcpy, yongfeng_memset, @@ -3798,6 +3890,9 @@ struct processor_costs shijidadao_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ shijidadao_memcpy, shijidadao_memset, @@ -3922,6 +4017,9 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, @@ -4051,6 +4149,9 @@ struct processor_costs core_cost = { COSTS_N_INSNS (32), /* cost of DIVSD instruction. */ COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset, diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index 24a28dc..0d3d026 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -20678,6 +20678,9 @@ mips_option_override (void) "-mcompact-branches=never"); } + if (is_micromips && TARGET_MSA) + error ("unsupported combination: %s", "-mmicromips -mmsa"); + /* Require explicit relocs for MIPS R6 onwards. This enables simplification of the compact branch and jump support through the backend. */ if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6) diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 2a3884c..d0919ec 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -1263,3 +1263,41 @@ expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands); DONE; }) + +;; If we have an XOR/IOR with a constant operand (C) and the we can +;; synthesize ~C more efficiently than C, then synthesize ~C and use +;; xnor/orn instead. +;; +;; The same can be done for AND, but mvconst_internal's issues get in +;; the way. That's future work. +(define_split + [(set (match_operand:X 0 "register_operand") + (any_or:X (match_operand:X 1 "register_operand") + (match_operand:X 2 "const_int_operand"))) + (clobber (match_operand:X 3 "register_operand"))] + "TARGET_ZBB + && (riscv_const_insns (operands[2], true) + > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))" + [(const_int 0)] +{ + /* Get the inverted constant into the temporary register. */ + riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2]))); + + /* For xnor, the NOT operation is in a different position. So + we have to customize the split code we generate a bit. + + It is expected that AND will be handled like IOR in the future. */ + if (<CODE> == XOR) + { + rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]); + x = gen_rtx_NOT (<X:MODE>mode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + } + else + { + rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]); + x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]); + emit_insn (gen_rtx_SET (operands[0], x)); + } + DONE; +}) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index d3656a7..bad59e2 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -13136,9 +13136,6 @@ parse_features_for_version (tree decl, DECL_SOURCE_LOCATION (decl)); gcc_assert (parse_res); - if (arch_string != default_opts->x_riscv_arch_string) - free (CONST_CAST (void *, (const void *) arch_string)); - cl_target_option_restore (&global_options, &global_options_set, &cur_target); } diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 51eb64f..3ab4d76 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -2136,18 +2136,34 @@ (match_operand 7 "const_int_operand") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLS - (match_operand:<VEL> 3 "direct_broadcast_operand")) + ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below. + (match_operand:<VEL> 3 "direct_broadcast_operand") ;; ) (match_operand:V_VLS 2 "vector_merge_operand")))] "TARGET_VECTOR" { /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f has better chances to do vsetvl fusion in vsetvl pass. */ + bool wrap_vec_dup = true; + rtx vec_cst = NULL_RTX; if (riscv_vector::splat_to_scalar_move_p (operands)) { operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode); operands[3] = force_reg (<VEL>mode, operands[3]); } + else if (immediate_operand (operands[3], <VEL>mode) + && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3])) + && (/* -> pred_broadcast<mode>_zero */ + (vector_least_significant_set_mask_operand (operands[1], + <VM>mode) + && vector_const_0_operand (vec_cst, <MODE>mode)) + || (/* pred_broadcast<mode>_imm */ + vector_all_trues_mask_operand (operands[1], <VM>mode) + && vector_const_int_or_double_0_operand (vec_cst, + <MODE>mode)))) + { + operands[3] = vec_cst; + wrap_vec_dup = false; + } /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */ else if (satisfies_constraint_Wdm (operands[3])) { @@ -2191,6 +2207,8 @@ ; else operands[3] = force_reg (<VEL>mode, operands[3]); + if (wrap_vec_dup) + operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]); }) (define_insn_and_split "*pred_broadcast<mode>" diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def index 80650b4..e31ae69 100644 --- a/gcc/config/sh/sh-modes.def +++ b/gcc/config/sh/sh-modes.def @@ -17,6 +17,12 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +/* SH has the same reversed quiet bit as MIPS. */ +RESET_FLOAT_FORMAT (SF, mips_single_format); +RESET_FLOAT_FORMAT (DF, mips_double_format); +/* TFmode: IEEE quad floating point (software). */ +FLOAT_MODE (TF, 16, mips_quad_format); + /* Vector modes. */ VECTOR_MODE (INT, QI, 2); /* V2QI */ VECTOR_MODES (INT, 4); /* V4QI V2HI */ diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 644b36a..e85a710 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,31 @@ +2025-04-21 Jason Merrill <jason@redhat.com> + + * constexpr.cc (cxx_eval_outermost_constant_expr): Move + verify_constant later. + +2025-04-21 Jason Merrill <jason@redhat.com> + + PR c++/118775 + * constexpr.cc (cxx_eval_call_expression): Add assert. + (fold_to_constant): Handle processing_template_decl. + * init.cc (build_new_1): Use fold_to_constant. + +2025-04-21 Jason Merrill <jason@redhat.com> + + PR c++/99456 + * constexpr.cc (cxx_eval_constant_expression): Check strict + instead of manifestly_const_eval. + (maybe_constant_init_1): Be strict for static constexpr vars. + +2025-04-19 Jason Merrill <jason@redhat.com> + + * coroutines.cc (coro_build_expr_stmt) + (coro_build_cvt_void_expr_stmt): Remove. + (build_actor_fn): Use finish_expr_stmt. + * semantics.cc (finish_expr_stmt): Avoid wrapping statement in + EXPR_STMT. + (finish_stmt_expr_expr): Add comment. + 2025-04-17 Jason Merrill <jason@redhat.com> * constexpr.cc (is_valid_constexpr_fn): Improve diagnostic. diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index f56c5c4..8a11e62 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -2956,12 +2956,11 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, gcc_assert (arg0); if (new_op_p) { - /* FIXME: We should not get here; the VERIFY_CONSTANT above - should have already caught it. But currently a conversion - from pointer type to arithmetic type is only considered - non-constant for CONVERT_EXPRs, not NOP_EXPRs. */ if (!tree_fits_uhwi_p (arg0)) { + /* We should not get here; the VERIFY_CONSTANT above + should have already caught it. */ + gcc_checking_assert (false); if (!ctx->quiet) error_at (loc, "cannot allocate array: size not constant"); *non_constant_p = true; @@ -8479,7 +8478,7 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, if (TREE_CODE (t) == CONVERT_EXPR && ARITHMETIC_TYPE_P (type) && INDIRECT_TYPE_P (TREE_TYPE (op)) - && ctx->manifestly_const_eval == mce_true) + && ctx->strict) { if (!ctx->quiet) error_at (loc, @@ -9228,11 +9227,6 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant, if (r == void_node && !constexpr_dtor && ctx.ctor) r = ctx.ctor; - if (!constexpr_dtor) - verify_constant (r, allow_non_constant, &non_constant_p, &overflow_p); - else - DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (object) = true; - unsigned int i; tree cleanup; /* Evaluate the cleanups. */ @@ -9251,15 +9245,6 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant, non_constant_p = true; } - if (TREE_CODE (r) == CONSTRUCTOR && CONSTRUCTOR_NO_CLEARING (r)) - { - if (!allow_non_constant) - error ("%qE is not a constant expression because it refers to " - "an incompletely initialized variable", t); - TREE_CONSTANT (r) = false; - non_constant_p = true; - } - if (!non_constant_p && cxx_dialect >= cxx20 && !global_ctx.heap_vars.is_empty ()) { @@ -9316,6 +9301,21 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant, non_constant_p = true; } + if (!non_constant_p && !constexpr_dtor) + verify_constant (r, allow_non_constant, &non_constant_p, &overflow_p); + + /* After verify_constant because reduced_constant_expression_p can unset + CONSTRUCTOR_NO_CLEARING. */ + if (!non_constant_p + && TREE_CODE (r) == CONSTRUCTOR && CONSTRUCTOR_NO_CLEARING (r)) + { + if (!allow_non_constant) + error ("%qE is not a constant expression because it refers to " + "an incompletely initialized variable", t); + TREE_CONSTANT (r) = false; + non_constant_p = true; + } + if (non_constant_p) /* If we saw something bad, go back to our argument. The wrapping below is only for the cases of TREE_CONSTANT argument or overflow. */ @@ -9332,13 +9332,17 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant, if (non_constant_p && !allow_non_constant) return error_mark_node; - else if (constexpr_dtor) - return r; else if (non_constant_p && TREE_CONSTANT (r)) r = mark_non_constant (r); else if (non_constant_p) return t; + if (constexpr_dtor) + { + DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (object) = true; + return r; + } + /* Check we are not trying to return the wrong type. */ if (!same_type_ignoring_top_level_qualifiers_p (type, TREE_TYPE (r))) { @@ -9490,6 +9494,9 @@ fold_simple (tree t) tree fold_to_constant (tree t) { + if (processing_template_decl) + return t; + tree r = fold (t); if (CONSTANT_CLASS_P (r) && !TREE_OVERFLOW (r)) return r; @@ -9747,16 +9754,26 @@ maybe_constant_init_1 (tree t, tree decl, bool allow_non_constant, { /* [basic.start.static] allows constant-initialization of variables with static or thread storage duration even if it isn't required, but we - shouldn't bend the rules the same way for automatic variables. */ + shouldn't bend the rules the same way for automatic variables. + + But still enforce the requirements of constexpr/constinit. + [dcl.constinit] "If a variable declared with the constinit specifier + has dynamic initialization, the program is ill-formed, even if the + implementation would perform that initialization as a static + initialization." */ bool is_static = (decl && DECL_P (decl) && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))); + bool strict = (!is_static + || (decl && DECL_P (decl) + && (DECL_DECLARED_CONSTEXPR_P (decl) + || DECL_DECLARED_CONSTINIT_P (decl)))); if (is_static) manifestly_const_eval = mce_true; if (cp_unevaluated_operand && manifestly_const_eval != mce_true) return fold_to_constant (t); - t = cxx_eval_outermost_constant_expr (t, allow_non_constant, !is_static, + t = cxx_eval_outermost_constant_expr (t, allow_non_constant, strict, manifestly_const_eval, false, decl); } diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index b92d09f..743da06 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -1852,21 +1852,6 @@ coro_build_frame_access_expr (tree coro_ref, tree member_id, bool preserve_ref, return expr; } -/* Helpers to build EXPR_STMT and void-cast EXPR_STMT, common ops. */ - -static tree -coro_build_expr_stmt (tree expr, location_t loc) -{ - return maybe_cleanup_point_expr_void (build_stmt (loc, EXPR_STMT, expr)); -} - -static tree -coro_build_cvt_void_expr_stmt (tree expr, location_t loc) -{ - tree t = build1 (CONVERT_EXPR, void_type_node, expr); - return coro_build_expr_stmt (t, loc); -} - /* Helpers to build an artificial var, with location LOC, NAME and TYPE, in CTX, and with initializer INIT. */ @@ -2582,8 +2567,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, tree hfa = build_new_method_call (ash, hfa_m, &args, NULL_TREE, LOOKUP_NORMAL, NULL, tf_warning_or_error); r = cp_build_init_expr (ash, hfa); - r = coro_build_cvt_void_expr_stmt (r, loc); - add_stmt (r); + finish_expr_stmt (r); release_tree_vector (args); /* Now we know the real promise, and enough about the frame layout to @@ -2678,8 +2662,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, we must tail call them. However, some targets do not support indirect tail calls to arbitrary callees. See PR94359. */ CALL_EXPR_TAILCALL (resume) = true; - resume = coro_build_cvt_void_expr_stmt (resume, loc); - add_stmt (resume); + finish_expr_stmt (resume); r = build_stmt (loc, RETURN_EXPR, NULL); gcc_checking_assert (maybe_cleanup_point_expr_void (r) == r); diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc index e589e45..062a493 100644 --- a/gcc/cp/init.cc +++ b/gcc/cp/init.cc @@ -3405,7 +3405,7 @@ build_new_1 (vec<tree, va_gc> **placement, tree type, tree nelts, errval = throw_bad_array_new_length (); if (outer_nelts_check != NULL_TREE) size = build3 (COND_EXPR, sizetype, outer_nelts_check, size, errval); - size = cp_fully_fold (size); + size = fold_to_constant (size); /* Create the argument list. */ vec_safe_insert (*placement, 0, size); /* Do name-lookup to find the appropriate operator. */ @@ -3462,7 +3462,7 @@ build_new_1 (vec<tree, va_gc> **placement, tree type, tree nelts, outer_nelts_check = NULL_TREE; } - size = cp_fully_fold (size); + size = fold_to_constant (size); /* If size is zero e.g. due to type having zero size, try to preserve outer_nelts for constant expression evaluation purposes. */ diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 7f23efd..1aa35d3 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -1180,10 +1180,13 @@ finish_expr_stmt (tree expr) expr = error_mark_node; /* Simplification of inner statement expressions, compound exprs, - etc can result in us already having an EXPR_STMT. */ + etc can result in us already having an EXPR_STMT or other statement + tree. Don't wrap them in EXPR_STMT. */ if (TREE_CODE (expr) != CLEANUP_POINT_EXPR) { - if (TREE_CODE (expr) != EXPR_STMT) + if (TREE_CODE (expr) != EXPR_STMT + && !STATEMENT_CLASS_P (expr) + && TREE_CODE (expr) != STATEMENT_LIST) expr = build_stmt (loc, EXPR_STMT, expr); expr = maybe_cleanup_point_expr_void (expr); } @@ -3082,6 +3085,7 @@ finish_stmt_expr_expr (tree expr, tree stmt_expr) } else if (processing_template_decl) { + /* Not finish_expr_stmt because we don't want convert_to_void. */ expr = build_stmt (input_location, EXPR_STMT, expr); expr = add_stmt (expr); /* Mark the last statement so that we can recognize it as such at diff --git a/gcc/except.cc b/gcc/except.cc index 205811c..0fe1e09 100644 --- a/gcc/except.cc +++ b/gcc/except.cc @@ -2949,7 +2949,14 @@ switch_to_exception_section (const char * ARG_UNUSED (fnname)) { section *s; - if (exception_section) + if (exception_section + /* Don't use the cached section for comdat if it will be different. */ +#ifdef HAVE_LD_EH_GC_SECTIONS + && !(targetm_common.have_named_sections + && DECL_COMDAT_GROUP (current_function_decl) + && HAVE_COMDAT_GROUP) +#endif + ) s = exception_section; else { diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 1c45bdb..56325a9 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,10 @@ +2025-04-19 Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/119836 + * resolve.cc (check_pure_function): Fix checking for + an impure subprogram within a DO CONCURRENT construct. + (pure_subroutine): Ditto. + 2025-04-16 Harald Anlauf <anlauf@gmx.de> PR fortran/106948 diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc index 2ecbd50..f03708e 100644 --- a/gcc/fortran/resolve.cc +++ b/gcc/fortran/resolve.cc @@ -3260,14 +3260,30 @@ static bool check_pure_function (gfc_expr *e) gfc_do_concurrent_flag = 0 when the check for an impure function occurs. Check the stack to see if the source code has a nested BLOCK construct. */ + for (stack = cs_base; stack; stack = stack->prev) { - if (stack->current->op == EXEC_BLOCK) saw_block = true; + if (!saw_block && stack->current->op == EXEC_BLOCK) + { + saw_block = true; + continue; + } + if (saw_block && stack->current->op == EXEC_DO_CONCURRENT) { - gfc_error ("Reference to impure function at %L inside a " - "DO CONCURRENT", &e->where); - return false; + bool is_pure; + is_pure = (e->value.function.isym + && (e->value.function.isym->pure + || e->value.function.isym->elemental)) + || (e->value.function.esym + && (e->value.function.esym->attr.pure + || e->value.function.esym->attr.elemental)); + if (!is_pure) + { + gfc_error ("Reference to impure function at %L inside a " + "DO CONCURRENT", &e->where); + return false; + } } } @@ -3663,16 +3679,29 @@ pure_subroutine (gfc_symbol *sym, const char *name, locus *loc) /* A BLOCK construct within a DO CONCURRENT construct leads to gfc_do_concurrent_flag = 0 when the check for an impure subroutine - occurs. Check the stack to see if the source code has a nested - BLOCK construct. */ + occurs. Walk up the stack to see if the source code has a nested + construct. */ + for (stack = cs_base; stack; stack = stack->prev) { - if (stack->current->op == EXEC_BLOCK) saw_block = true; + if (stack->current->op == EXEC_BLOCK) + { + saw_block = true; + continue; + } + if (saw_block && stack->current->op == EXEC_DO_CONCURRENT) { - gfc_error ("Subroutine call at %L in a DO CONCURRENT block " - "is not PURE", loc); - return false; + + bool is_pure = true; + is_pure = sym->attr.pure || sym->attr.elemental; + + if (!is_pure) + { + gfc_error ("Subroutine call at %L in a DO CONCURRENT block " + "is not PURE", loc); + return false; + } } } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4cdc9c1..4c219bd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,78 @@ +2025-04-21 Jason Merrill <jason@redhat.com> + + PR c++/118775 + * g++.dg/cpp2a/constexpr-new24.C: Adjust diagnostic. + +2025-04-21 Andrew Bennett <andrew.bennett@imgtec.com> + + * gcc.dg/memcpy-4.c: Remove mips specific code. + * gcc.target/mips/memcpy-2.c: New test. + +2025-04-21 Matthew Fortune <matthew.fortune@imgtec.com> + + * gcc.target/mips/clear-cache-1.c: Also allow jrc. + +2025-04-21 Matthew Fortune <matthew.fortune@imgtec.com> + + * gcc.dg/tree-ssa/ssa-dom-cse-2.c: Do not check output for + MIPS lp64 abi. + +2025-04-21 Andrew Pinski <quic_apinski@quicinc.com> + + PR middle-end/119507 + * g++.dg/eh/pr119507.C: New test. + +2025-04-21 hongtao.liu <hongtao.liu@intel.com> + + * gcc.target/i386/recip-vec-divf-fma.c: New test. + +2025-04-20 H.J. Lu <hjl.tools@gmail.com> + + PR target/117863 + * gcc.dg/rtl/i386/vector_eq-2.c: New test. + * gcc.dg/rtl/i386/vector_eq-3.c: Likewise. + +2025-04-19 Thomas Schwinge <tschwinge@baylibre.com> + + PR testsuite/119508 + * rust/compile/nr2/compile.exp: Disable parallel testing. + +2025-04-19 Co-authored-by: Jeff Law <jlaw@ventanamicro.com> + + PR target/118410 + * gcc.target/riscv/pr118410-1.c: New test. + * gcc.target/riscv/pr118410-2.c: Likewise. + +2025-04-19 Andrew Pinski <quic_apinski@quicinc.com> + + * gcc.dg/pr118947-1.c: Use 1025 as the size of the buf. + * gcc.dg/pr78408-3.c: Likewise. + +2025-04-19 Andrew Pinski <quic_apinski@quicinc.com> + + PR rtl-optimization/111949 + * gcc.target/aarch64/bic-1.c: New test. + +2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com> + + PR target/111814 + * gcc.target/sh/pr111814.c: New test. + +2025-04-19 Maciej W. Rozycki <macro@orcam.me.uk> + + * gcc.target/alpha/memcpy-nested-offset-long.c: New file. + * gcc.target/alpha/memcpy-nested-offset-quad.c: New file. + +2025-04-19 Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/119836 + * gfortran.dg/do_concurrent_all_clauses.f90: Remove invalid + dg-error test. + * gfortran.dg/pr119836_1.f90: New test. + * gfortran.dg/pr119836_2.f90: New test. + * gfortran.dg/pr119836_3.f90: New test. + * gfortran.dg/pr119836_4.f90: New test. + 2025-04-18 Thomas Schwinge <tschwinge@baylibre.com> PR cobol/119818 diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C index ee62f18..17c9f54 100644 --- a/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C +++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C @@ -6,14 +6,14 @@ int a; constexpr char * f1 () { - constexpr auto p = new char[(long int) &a]; // { dg-error "size not constant" } + constexpr auto p = new char[(long int) &a]; // { dg-error "conversion from pointer" } return p; } constexpr char * f2 () { - auto p = new char[(long int) &a]; // { dg-error "size not constant" } + auto p = new char[(long int) &a]; // { dg-error "conversion from pointer" } return p; } diff --git a/gcc/testsuite/g++.dg/eh/pr119507.C b/gcc/testsuite/g++.dg/eh/pr119507.C new file mode 100644 index 0000000..c68536f --- /dev/null +++ b/gcc/testsuite/g++.dg/eh/pr119507.C @@ -0,0 +1,19 @@ +// { dg-do compile { target comdat_group } } +// ARM EABI has its own exception handling data handling and does not use gcc_except_table +// { dg-skip-if "!TARGET_EXCEPTION_DATA" { arm_eabi } } +// Force off function sections +// Force on exceptions +// { dg-options "-fno-function-sections -fexceptions" } +// PR middle-end/119507 + + +inline int comdat() { try { throw 1; } catch (int) { return 1; } return 0; } +int another_func_with_exception() { try { throw 1; } catch (int) { return 1; } return 0; } +inline int comdat1() { try { throw 1; } catch (int) { return 1; } return 0; } +int foo() { return comdat() + comdat1(); } + +// Make sure the gcc puts the exception table for both comdat and comdat1 in their own section +// { dg-final { scan-assembler-times ".section\[\t \]\[^\n\]*.gcc_except_table._Z6comdatv" 1 } } +// { dg-final { scan-assembler-times ".section\[\t \]\[^\n\]*.gcc_except_table._Z7comdat1v" 1 } } +// There should be 3 exception tables, +// { dg-final { scan-assembler-times ".section\[\t \]\[^\n\]*.gcc_except_table" 3 } } diff --git a/gcc/testsuite/gcc.dg/memcpy-4.c b/gcc/testsuite/gcc.dg/memcpy-4.c index 4c726f0..b17b369 100644 --- a/gcc/testsuite/gcc.dg/memcpy-4.c +++ b/gcc/testsuite/gcc.dg/memcpy-4.c @@ -1,13 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-rtl-expand" } */ +/* { dg-options "-O2" } */ -#ifdef __mips -__attribute__((nomips16)) -#endif void f1 (char *p) { __builtin_memcpy (p, "12345", 5); } - -/* { dg-final { scan-rtl-dump "mem/u.*mem/u" "expand" { target mips*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/pr118947-1.c b/gcc/testsuite/gcc.dg/pr118947-1.c index 70b7f80..8733e8d 100644 --- a/gcc/testsuite/gcc.dg/pr118947-1.c +++ b/gcc/testsuite/gcc.dg/pr118947-1.c @@ -6,10 +6,10 @@ void* aaa(); void* bbb() { - char buf[32] = {}; + char buf[1025] = {}; /* Tha call to aaa should not matter and clobber buf. */ void* ret = aaa(); - __builtin_memcpy(ret, buf, 32); + __builtin_memcpy(ret, buf, sizeof(buf)); return ret; } diff --git a/gcc/testsuite/gcc.dg/pr78408-3.c b/gcc/testsuite/gcc.dg/pr78408-3.c index 3de90d0..5ea5458 100644 --- a/gcc/testsuite/gcc.dg/pr78408-3.c +++ b/gcc/testsuite/gcc.dg/pr78408-3.c @@ -7,8 +7,8 @@ void* aaa(); void* bbb() { void* ret = aaa(); - char buf[32] = {}; - __builtin_memcpy(ret, buf, 32); + char buf[1025] = {}; + __builtin_memcpy(ret, buf, sizeof(buf)); return ret; } diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c new file mode 100644 index 0000000..871d489 --- /dev/null +++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c @@ -0,0 +1,71 @@ +/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-additional-options "-O2 -march=x86-64-v3" } */ + +typedef int v4si __attribute__((vector_size(16))); +typedef int v8si __attribute__((vector_size(32))); +typedef int v2di __attribute__((vector_size(16))); + +v4si __RTL (startwith ("vregs1")) foo1 (void) +{ +(function "foo1" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)]))) + (cinsn 4 (set (reg:V4SI <1>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)]))) + (cinsn 5 (set (reg:V4SI <2>) + (eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>)))) + (cinsn 6 (set (reg:V4SI <3>) (reg:V4SI <2>))) + (cinsn 7 (set (reg:V4SI xmm0) (reg:V4SI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V4SI xmm0))) +) +} + +v8si __RTL (startwith ("vregs1")) foo2 (void) +{ +(function "foo2" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V8SI <0>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)]))) + (cinsn 4 (set (reg:V8SI <1>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)]))) + (cinsn 5 (set (reg:V8SI <2>) + (eq:V8SI (reg:V8SI <0>) (reg:V8SI <1>)))) + (cinsn 6 (set (reg:V8SI <3>) (reg:V8SI <2>))) + (cinsn 7 (set (reg:V8SI xmm0) (reg:V8SI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V8SI xmm0))) +) +} + +v2di __RTL (startwith ("vregs1")) foo3 (void) +{ +(function "foo3" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V2DI <0>) (const_vector:V2DI [(const_int -1) (const_int -1)]))) + (cinsn 4 (set (reg:V2DI <1>) (const_vector:V2DI [(const_int -1) (const_int -1)]))) + (cinsn 5 (set (reg:V2DI <2>) + (eq:V2DI (reg:V2DI <0>) (reg:V2DI <1>)))) + (cinsn 6 (set (reg:V2DI <3>) (reg:V2DI <2>))) + (cinsn 7 (set (reg:V2DI xmm0) (reg:V2DI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V2DI xmm0))) +) +} + +/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */ diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c new file mode 100644 index 0000000..276c4c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c @@ -0,0 +1,74 @@ +/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-additional-options "-O2 -march=x86-64-v3" } */ + +typedef int v4si __attribute__((vector_size(16))); +typedef int v8si __attribute__((vector_size(32))); +typedef int v2di __attribute__((vector_size(16))); + +v4si __RTL (startwith ("vregs1")) foo1 (void) +{ +(function "foo1" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V4SI <1>) + (mem:V4SI (reg:SI di) [0 ptr S128 A128]))) + (cinsn 4 (set (reg:V4SI <2>) + (eq:V4SI (reg:V4SI <1>) + (mem:V4SI (reg:SI di) [0 ptr S128 A128])))) + (cinsn 5 (set (reg:V4SI <3>) (reg:V4SI <2>))) + (cinsn 6 (set (reg:V4SI xmm0) (reg:V4SI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V4SI xmm0))) +) +} + +v8si __RTL (startwith ("vregs1")) foo2 (void) +{ +(function "foo2" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V8SI <1>) + (mem:V8SI (reg:SI di) [0 ptr S256 A256]))) + (cinsn 4 (set (reg:V8SI <2>) + (eq:V8SI (mem:V8SI (reg:SI di) [0 ptr S256 A256]) + (reg:V8SI <1>)))) + (cinsn 5 (set (reg:V8SI <3>) (reg:V8SI <2>))) + (cinsn 6 (set (reg:V8SI xmm0) (reg:V8SI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V8SI xmm0))) +) +} + +v2di __RTL (startwith ("vregs1")) foo3 (void) +{ +(function "foo3" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V2DI <1>) + (mem:V2DI (reg:SI di) [0 ptr S128 A128]))) + (cinsn 4 (set (reg:V2DI <2>) + (eq:V2DI (reg:V2DI <1>) + (mem:V2DI (reg:SI di) [0 ptr S128 A128])))) + (cinsn 5 (set (reg:V2DI <3>) (reg:V2DI <2>))) + (cinsn 6 (set (reg:V2DI xmm0) (reg:V2DI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V2DI xmm0))) +) +} + +/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c index a879d30..6fa52f6 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c @@ -27,4 +27,4 @@ foo () but the loop reads only one element at a time, and DOM cannot resolve these. The same happens on powerpc depending on the SIMD support available. */ -/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* nvptx*-*-* mmix-knuth-mmixware } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */ +/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* nvptx*-*-* mmix-knuth-mmixware } || { { { lp64 && { mips*-*-* powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/bic-1.c b/gcc/testsuite/gcc.target/aarch64/bic-1.c new file mode 100644 index 0000000..65e1514 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bic-1.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* PR rtl-optmization/111949 */ + +/* +**func1: +** bic w([0-9]+), w0, w1 +** and w0, w\1, 1 +** ret +*/ + +unsigned func1(unsigned a, bool b) +{ + int c = a & b; + return (c ^ a)&1; +} + +/* +**func2: +** bic w([0-9]+), w1, w0 +** and w0, w\1, 255 +** ret +*/ +unsigned func2(bool a, bool b) +{ + return ~a & b; +} + +/* +**func3: +** bic w([0-9]+), w1, w0 +** and w0, w\1, 1 +** ret +*/ +bool func3(bool a, unsigned char b) +{ + return !a & b; +} diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c new file mode 100644 index 0000000..631d14f --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c @@ -0,0 +1,76 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +typedef unsigned int __attribute__ ((mode (DI))) int64_t; +typedef unsigned int __attribute__ ((mode (SI))) int32_t; + +typedef union + { + int32_t l[8]; + } +val; + +typedef struct + { + int32_t l[2]; + val v; + } +tre; + +typedef struct + { + int32_t l[3]; + tre t; + } +due; + +typedef struct + { + val v; + int64_t q; + int32_t l[2]; + due d; + } +uno; + +void +memcpy_nested_offset_long (uno *u) +{ + u->d.t.v = u->v; +} + +/* Expect assembly such as: + + ldq $4,0($16) + ldq $3,8($16) + ldq $2,16($16) + srl $4,32,$7 + ldq $1,24($16) + srl $3,32,$6 + stl $4,68($16) + srl $2,32,$5 + stl $7,72($16) + srl $1,32,$4 + stl $3,76($16) + stl $6,80($16) + stl $2,84($16) + stl $5,88($16) + stl $1,92($16) + stl $4,96($16) + + that is with four quadword loads at offsets 0, 8, 16, 24 each and + eight longword stores at offsets 68, 72, 76, 80, 84, 88, 92, 96 each. */ + +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,0\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,8\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,16\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,24\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,68\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,72\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,76\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,80\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,84\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,88\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,92\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,96\\\(\\\$16\\\)\\s" 1 } } */ diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c new file mode 100644 index 0000000..1d2227e --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +typedef unsigned int __attribute__ ((mode (DI))) int64_t; +typedef unsigned int __attribute__ ((mode (SI))) int32_t; + +typedef union + { + int32_t l[8]; + } +val; + +typedef struct + { + int32_t l[2]; + val v; + } +tre; + +typedef struct + { + int32_t l[3]; + tre t; + } +due; + +typedef struct + { + val v; + int64_t q; + int32_t l[3]; + due d; + } +uno; + +void +memcpy_nested_offset_quad (uno *u) +{ + u->d.t.v = u->v; +} + +/* Expect assembly such as: + + ldq $4,0($16) + ldq $3,8($16) + ldq $2,16($16) + ldq $1,24($16) + stq $4,72($16) + stq $3,80($16) + stq $2,88($16) + stq $1,96($16) + + that is with four quadword loads at offsets 0, 8, 16, 24 each + and four quadword stores at offsets 72, 80, 88, 96 each. */ + +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,0\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,8\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,16\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,24\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,72\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,80\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,88\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,96\\\(\\\$16\\\)\\s" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c new file mode 100644 index 0000000..ad9e07b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mfma -mavx2" } */ +/* { dg-final { scan-assembler-times {(?n)vfn?m(add|sub)[1-3]*ps} 2 } } */ + +typedef float v4sf __attribute__((vector_size(16))); +/* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a */ + +v4sf +foo (v4sf a, v4sf b) +{ + return a / b; +} diff --git a/gcc/testsuite/gcc.target/mips/clear-cache-1.c b/gcc/testsuite/gcc.target/mips/clear-cache-1.c index f1554f5..cd11c66 100644 --- a/gcc/testsuite/gcc.target/mips/clear-cache-1.c +++ b/gcc/testsuite/gcc.target/mips/clear-cache-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-msynci isa_rev>=2" } */ /* { dg-final { scan-assembler "\tsynci\t" } } */ -/* { dg-final { scan-assembler "\tjr.hb\t" } } */ +/* { dg-final { scan-assembler "\tjrc?.hb\t" } } */ /* { dg-final { scan-assembler-not "_flush_cache|mips_sync_icache|_cacheflush" } } */ NOMIPS16 void f() diff --git a/gcc/testsuite/gcc.target/mips/memcpy-2.c b/gcc/testsuite/gcc.target/mips/memcpy-2.c new file mode 100644 index 0000000..df0cd18 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/memcpy-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "isa_rev<=5 -fdump-rtl-expand" } */ +/* { dg-skip-if "code quality test" { *-*-* } { "-Os" } { "" } } */ + +__attribute__((nomips16)) +void +f1 (char *p) +{ + __builtin_memcpy (p, "12345", 5); +} + +/* { dg-final { scan-rtl-dump "mem/u.*mem/u" "expand" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/power11-3.c b/gcc/testsuite/gcc.target/powerpc/power11-3.c index fa1aedd..56bf881 100644 --- a/gcc/testsuite/gcc.target/powerpc/power11-3.c +++ b/gcc/testsuite/gcc.target/powerpc/power11-3.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mdejagnu-cpu=power8 -O2" } */ +/* { dg-require-ifunc "" } */ /* Check if we can set the power11 target via a target_clones attribute. */ diff --git a/gcc/testsuite/gcc.target/riscv/pr118410-1.c b/gcc/testsuite/gcc.target/riscv/pr118410-1.c new file mode 100644 index 0000000..4a8b847 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr118410-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ +/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */ +/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */ + +long orlow(long x) { return x | ((1L << 24) - 1); } + +/* { dg-final { scan-assembler-times "orn\t" 1 } } */ +/* { dg-final { scan-assembler-not "addi\t" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/pr118410-2.c b/gcc/testsuite/gcc.target/riscv/pr118410-2.c new file mode 100644 index 0000000..b63a1d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr118410-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ +/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */ +/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */ + +long xorlow(long x) { return x ^ ((1L << 24) - 1); } + +/* { dg-final { scan-assembler-times "xnor\t" 1 } } */ +/* { dg-final { scan-assembler-not "addi\t" } } */ diff --git a/gcc/testsuite/gcc.target/sh/pr111814.c b/gcc/testsuite/gcc.target/sh/pr111814.c new file mode 100644 index 0000000..a88e5d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/sh/pr111814.c @@ -0,0 +1,7 @@ +/* Verify that __builtin_nan("") produces a constant matches + architecture specification. */ +/* { dg-do compile } */ + +double d = __builtin_nan (""); + +/* { dg-final { scan-assembler "\t.long\t-1\n\t.long\t2146959359\n" } } */ diff --git a/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90 b/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90 index 0c8a6ad..a7fa7c3 100644 --- a/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90 +++ b/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90 @@ -18,7 +18,7 @@ program do_concurrent_all_clauses squared = i * i arr(i) = temp2 + squared sum = sum + arr(i) - max_val = max(max_val, arr(i)) ! { dg-error "Reference to impure function" } + max_val = max(max_val, arr(i)) end block end do print *, arr, sum, max_val diff --git a/gcc/testsuite/gfortran.dg/pr119836_1.f90 b/gcc/testsuite/gfortran.dg/pr119836_1.f90 new file mode 100644 index 0000000..984e2d0 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr119836_1.f90 @@ -0,0 +1,18 @@ +! +! { dg-do run } +! +! PR fortran/119836 +! +program p + implicit none + integer, parameter :: n = 4 + integer :: i + integer :: y(n), x(n) + do concurrent (i=1:n) + x(i) = shiftl (i,1) ! accepted + block + y(i) = shiftl (i,1) ! wrongly rejected + end block + end do + if (any(x /= y)) stop 1 +end program p diff --git a/gcc/testsuite/gfortran.dg/pr119836_2.f90 b/gcc/testsuite/gfortran.dg/pr119836_2.f90 new file mode 100644 index 0000000..5e2d0c9 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr119836_2.f90 @@ -0,0 +1,21 @@ +! +! { dg-do compile } +! +! PR fortran/119836 +! +! Although intrinsic functions contained within the Fortran standard +! are pure procedures, many of the additional intrinsic functions +! supplied in libgfortran are impure. RAND() is one such function. +! +program foo + implicit none + integer i + real x(4) + do concurrent (i=1:4) + x = rand() ! { dg-error "Reference to impure function" } + block + x = rand() ! { dg-error "Reference to impure function" } + end block + end do + print *, x +end program foo diff --git a/gcc/testsuite/gfortran.dg/pr119836_3.f90 b/gcc/testsuite/gfortran.dg/pr119836_3.f90 new file mode 100644 index 0000000..69a5fcf --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr119836_3.f90 @@ -0,0 +1,30 @@ +! +! { dg-do run } +! +! PR fortran/119836 +! +program p + implicit none + integer, parameter :: n = 4 + integer :: i + integer :: y(n), x(n) + x = [(i,i=1,n)] + do concurrent (i=1:n) + call bar(x, y) + end do + if (any(x /= y)) stop 1 + x = 2 * x + do concurrent (i=1:n) + block + call bar(x, y) + end block + end do + if (any(x /= y)) stop 1 + + contains + elemental subroutine bar(x, y) + integer, intent(in) :: x + integer, intent(out) :: y + y = x + end subroutine +end program p diff --git a/gcc/testsuite/gfortran.dg/pr119836_4.f90 b/gcc/testsuite/gfortran.dg/pr119836_4.f90 new file mode 100644 index 0000000..dc6f72b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr119836_4.f90 @@ -0,0 +1,30 @@ +! +! { dg-do compile } +! +! PR fortran/119836 +! +program p + implicit none + integer, parameter :: n = 4 + integer :: i + integer :: y(n), x(n) + x = [(i,i=1,n)] + do concurrent (i=1:n) + call bar(x, y) ! { dg-error "Subroutine call" } + end do + if (any(x /= y)) stop 1 + x = 2 * x + do concurrent (i=1:n) + block + call bar(x, y) ! { dg-error "Subroutine call" } + end block + end do + if (any(x /= y)) stop 1 + + contains + subroutine bar(x, y) + integer, intent(in) :: x(:) + integer, intent(out) :: y(:) + y = x + end subroutine +end program p diff --git a/gcc/testsuite/rust/compile/nr2/compile.exp b/gcc/testsuite/rust/compile/nr2/compile.exp index 4d91dd0..9e15cdd 100644 --- a/gcc/testsuite/rust/compile/nr2/compile.exp +++ b/gcc/testsuite/rust/compile/nr2/compile.exp @@ -19,6 +19,15 @@ # Load support procs. load_lib rust-dg.exp +# These tests don't run runtest_file_p consistently if it +# doesn't return the same values, so disable parallelization +# of this *.exp file. The first parallel runtest to reach +# this will run all the tests serially. +if ![gcc_parallel_test_run_p compile] { + return +} +gcc_parallel_test_enable 0 + # Initialize `dg'. dg-init @@ -136,3 +145,5 @@ namespace eval rust-nr2-ns { # All done. dg-finish + +gcc_parallel_test_enable 1 @@ -2395,11 +2395,11 @@ public: array_slice (vec<OtherT, A, vl_embed> *v) : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} - iterator begin () { return m_base; } - iterator end () { return m_base + m_size; } + iterator begin () { gcc_checking_assert (is_valid ()); return m_base; } + iterator end () { gcc_checking_assert (is_valid ()); return m_base + m_size; } - const_iterator begin () const { return m_base; } - const_iterator end () const { return m_base + m_size; } + const_iterator begin () const { gcc_checking_assert (is_valid ()); return m_base; } + const_iterator end () const { gcc_checking_assert (is_valid ()); return m_base + m_size; } value_type &front (); value_type &back (); diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 946bf13..66feed5 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,37 @@ +2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com> + + PR target/118257 + * config/sh/sfp-machine.h (_FPU_GETCW): Implement with builtin. + (_FPU_SETCW): Likewise. + (FP_EX_ENABLE_SHIFT): Derive from arch spec. + (FP_EX_CAUSE_SHIFT): Likewise. + (FP_RND_MASK): Likewise. + (FP_EX_INVALID): Likewise. + (FP_EX_DIVZERO): Likewise. + (FP_EX_ALL): Likewise. + (FP_EX_OVERFLOW): Likewise. + (FP_EX_UNDERFLOW): Likewise. + (FP_EX_INEXACT): Likewise. + (_FP_DECL_EX): Declear default FCSR value. + (FP_RND_NEAREST): Derive from arch spec. + (FP_RND_ZERO): Likewise. + (FP_INIT_ROUNDMODE): Likewise. + (FP_ROUNDMODE): Likewise. + (FP_TRAPPING_EXCEPTIONS): Likewise. + (FP_HANDLE_EXCEPTIONS): Implement with _FPU_SETCW. + +2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com> + + PR target/111814 + * config/sh/sfp-machine.h (_FP_NANFRAC_B): Reverse signaling bit. + (_FP_NANFRAC_H): Likewise. + (_FP_NANFRAC_S): Likewise. + (_FP_NANFRAC_D): Likewise. + (_FP_NANFRAC_Q): Likewise. + (_FP_KEEPNANFRACP): Enable for target. + (_FP_QNANNEGATEDP): Enable for target. + (_FP_CHOOSENAN): Port from MIPS. + 2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> PR target/118794 diff --git a/libgcc/config/sh/sfp-machine.h b/libgcc/config/sh/sfp-machine.h index 66984d4..8030c80 100644 --- a/libgcc/config/sh/sfp-machine.h +++ b/libgcc/config/sh/sfp-machine.h @@ -39,11 +39,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) #define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) -#define _FP_NANFRAC_B _FP_QNANBIT_B -#define _FP_NANFRAC_H _FP_QNANBIT_H -#define _FP_NANFRAC_S _FP_QNANBIT_S -#define _FP_NANFRAC_D _FP_QNANBIT_D, 0 -#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 +#define _FP_NANFRAC_B (_FP_QNANBIT_B - 1) +#define _FP_NANFRAC_H (_FP_QNANBIT_H - 1) +#define _FP_NANFRAC_S (_FP_QNANBIT_S - 1) +#define _FP_NANFRAC_D (_FP_QNANBIT_D - 1), -1 +#define _FP_NANFRAC_Q (_FP_QNANBIT_Q - 1), -1, -1, -1 /* The type of the result of a floating point comparison. This must match __libgcc_cmp_return__ in GCC for the target. */ @@ -56,15 +56,71 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); #define _FP_NANSIGN_D 0 #define _FP_NANSIGN_Q 0 -#define _FP_KEEPNANFRACP 0 -#define _FP_QNANNEGATEDP 0 +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 1 + +/* X is chosen unless one of the NaNs is sNaN. */ +# define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) | \ + _FP_FRAC_HIGH_RAW_##fs(Y)) & _FP_QNANBIT_##fs) \ + { \ + R##_s = _FP_NANSIGN_##fs; \ + _FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#ifdef __SH_FPU_ANY__ +#define _FPU_GETCW(fpscr) fpscr = __builtin_sh_get_fpscr () +#define _FPU_SETCW(fpscr) __builtin_sh_set_fpscr (fpscr) +#define FP_EX_ENABLE_SHIFT 5 +#define FP_EX_CAUSE_SHIFT 10 -#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ - do { \ - R##_s = _FP_NANSIGN_##fs; \ - _FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs); \ - R##_c = FP_CLS_NAN; \ +#define FP_EX_INVALID 0x0040 +#define FP_EX_DIVZERO 0x0020 +#if defined (__SH2E__) +#define FP_EX_ALL (FP_EX_DIVZERO | FP_EX_INVALID) +#else +#define FP_EX_OVERFLOW 0x0010 +#define FP_EX_UNDERFLOW 0x0008 +#define FP_EX_INEXACT 0x0004 +#define FP_EX_ALL (FP_EX_DIVZERO | FP_EX_INEXACT | \ + FP_EX_INVALID | FP_EX_OVERFLOW | FP_EX_UNDERFLOW) +#endif +#define _FP_DECL_EX \ + unsigned int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST +/* Rounding modes. */ +#define FP_RND_NEAREST 0x0 +#define FP_RND_ZERO 0x1 +/* Placeholder, hardware does not have PINF/MINF modes. */ +#define FP_RND_PINF 0x2 +#define FP_RND_MINF 0x3 +#define FP_RND_MASK 3 + +#define FP_INIT_ROUNDMODE _FPU_GETCW (_fcsr) +#define FP_ROUNDMODE (_fcsr & FP_RND_MASK) +#define FP_TRAPPING_EXCEPTIONS ((_fcsr >> FP_EX_ENABLE_SHIFT) & FP_EX_ALL) +#define FP_HANDLE_EXCEPTIONS \ + do { \ + _fcsr &= ~(FP_EX_ALL << FP_EX_CAUSE_SHIFT); \ + _fcsr |= _fex | (_fex << FP_EX_CAUSE_SHIFT); \ + _FPU_SETCW (_fcsr); \ } while (0) +#else +#define FP_EX_INVALID (1 << 4) +#define FP_EX_DIVZERO (1 << 3) +#if !defined (__SH2E__) +#define FP_EX_OVERFLOW (1 << 2) +#define FP_EX_UNDERFLOW (1 << 1) +#define FP_EX_INEXACT (1 << 0) +#endif +#endif #define _FP_TININESS_AFTER_ROUNDING 1 diff --git a/libgcobol/ChangeLog b/libgcobol/ChangeLog index 6a0e961..9de1714 100644 --- a/libgcobol/ChangeLog +++ b/libgcobol/ChangeLog @@ -1,3 +1,10 @@ +2025-04-21 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> + + * configure.ac: Check for struct tm.tm_zone. + * configure, config.h.in: Regenerate. + * intrinsic.cc (__gg__formatted_current_date): Guard tm.tm_zone + use with HAVE_STRUCT_TM_TM_ZONE. + 2025-04-15 Andreas Schwab <schwab@suse.de> * configure.tgt: Set LIBGCOBOL_SUPPORTED for riscv64-*-linux* with diff --git a/libgcobol/config.h.in b/libgcobol/config.h.in index 6a53279..fdf5e3e 100644 --- a/libgcobol/config.h.in +++ b/libgcobol/config.h.in @@ -72,6 +72,9 @@ /* Define to 1 if you have the `strtof128' function. */ #undef HAVE_STRTOF128 +/* Define to 1 if `tm_zone' is a member of `struct tm'. */ +#undef HAVE_STRUCT_TM_TM_ZONE + /* Define to 1 if you have the <sys/stat.h> header file. */ #undef HAVE_SYS_STAT_H diff --git a/libgcobol/configure b/libgcobol/configure index e83119d..6821591 100755 --- a/libgcobol/configure +++ b/libgcobol/configure @@ -2449,6 +2449,63 @@ $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_cxx_check_func + +# ac_fn_cxx_check_member LINENO AGGR MEMBER VAR INCLUDES +# ------------------------------------------------------ +# Tries to find if the field MEMBER exists in type AGGR, after including +# INCLUDES, setting cache variable VAR accordingly. +ac_fn_cxx_check_member () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5 +$as_echo_n "checking for $2.$3... " >&6; } +if eval \${$4+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main () +{ +static $2 ac_aggr; +if (ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + eval "$4=yes" +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main () +{ +static $2 ac_aggr; +if (sizeof ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + eval "$4=yes" +else + eval "$4=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$4 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_cxx_check_member cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. @@ -11693,7 +11750,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11696 "configure" +#line 11753 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11799,7 +11856,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11802 "configure" +#line 11859 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -17434,6 +17491,19 @@ $as_echo "#define USE_IEC_60559 1" >>confdefs.h +# struct tm tm_zone is a POSIX.1-2024 addition. +ac_fn_cxx_check_member "$LINENO" "struct tm" "tm_zone" "ac_cv_member_struct_tm_tm_zone" "#include <time.h> +" +if test "x$ac_cv_member_struct_tm_tm_zone" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_TM_TM_ZONE 1 +_ACEOF + + +fi + + if test "${multilib}" = "yes"; then multilib_arg="--enable-multilib" else diff --git a/libgcobol/configure.ac b/libgcobol/configure.ac index a1e9513..4bb6905 100644 --- a/libgcobol/configure.ac +++ b/libgcobol/configure.ac @@ -231,6 +231,9 @@ elif test "${ENABLE_LIBQUADMATH_SUPPORT}" = "default" ; then fi LIBGCOBOL_CHECK_FLOAT128 +# struct tm tm_zone is a POSIX.1-2024 addition. +AC_CHECK_MEMBERS([struct tm.tm_zone],,,[#include <time.h>]) + if test "${multilib}" = "yes"; then multilib_arg="--enable-multilib" else diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc index 181b053..97f2bdc 100644 --- a/libgcobol/intrinsic.cc +++ b/libgcobol/intrinsic.cc @@ -1482,7 +1482,9 @@ __gg__formatted_current_date( cblc_field_t *dest, // Destination string __gg__clock_gettime(CLOCK_REALTIME, &ts); struct tm tm = {}; +#ifdef HAVE_STRUCT_TM_TM_ZONE tm.tm_zone = "GMT"; +#endif if( is_zulu ) { gmtime_r(&ts.tv_sec, &tm); |