aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog71
-rw-r--r--gcc/DATESTAMP2
-rw-r--r--gcc/combine.cc6
-rw-r--r--gcc/config/alpha/alpha.cc23
-rw-r--r--gcc/config/i386/i386-expand.cc44
-rw-r--r--gcc/config/i386/i386.cc57
-rw-r--r--gcc/config/i386/i386.h6
-rw-r--r--gcc/config/i386/x86-tune-costs.h121
-rw-r--r--gcc/config/mips/mips.cc3
-rw-r--r--gcc/config/riscv/bitmanip.md38
-rw-r--r--gcc/config/riscv/riscv.cc3
-rw-r--r--gcc/config/riscv/vector.md22
-rw-r--r--gcc/config/sh/sh-modes.def6
-rw-r--r--gcc/cp/ChangeLog28
-rw-r--r--gcc/cp/constexpr.cc63
-rw-r--r--gcc/cp/coroutines.cc21
-rw-r--r--gcc/cp/init.cc4
-rw-r--r--gcc/cp/semantics.cc8
-rw-r--r--gcc/except.cc9
-rw-r--r--gcc/fortran/ChangeLog7
-rw-r--r--gcc/fortran/resolve.cc49
-rw-r--r--gcc/testsuite/ChangeLog75
-rw-r--r--gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C4
-rw-r--r--gcc/testsuite/g++.dg/eh/pr119507.C19
-rw-r--r--gcc/testsuite/gcc.dg/memcpy-4.c7
-rw-r--r--gcc/testsuite/gcc.dg/pr118947-1.c4
-rw-r--r--gcc/testsuite/gcc.dg/pr78408-3.c4
-rw-r--r--gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c71
-rw-r--r--gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c74
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/bic-1.c40
-rw-r--r--gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c76
-rw-r--r--gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c12
-rw-r--r--gcc/testsuite/gcc.target/mips/clear-cache-1.c2
-rw-r--r--gcc/testsuite/gcc.target/mips/memcpy-2.c12
-rw-r--r--gcc/testsuite/gcc.target/powerpc/power11-3.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/pr118410-1.c9
-rw-r--r--gcc/testsuite/gcc.target/riscv/pr118410-2.c9
-rw-r--r--gcc/testsuite/gcc.target/sh/pr111814.c7
-rw-r--r--gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f902
-rw-r--r--gcc/testsuite/gfortran.dg/pr119836_1.f9018
-rw-r--r--gcc/testsuite/gfortran.dg/pr119836_2.f9021
-rw-r--r--gcc/testsuite/gfortran.dg/pr119836_3.f9030
-rw-r--r--gcc/testsuite/gfortran.dg/pr119836_4.f9030
-rw-r--r--gcc/testsuite/rust/compile/nr2/compile.exp11
-rw-r--r--gcc/vec.h8
-rw-r--r--libgcc/ChangeLog34
-rw-r--r--libgcc/config/sh/sfp-machine.h80
-rw-r--r--libgcobol/ChangeLog7
-rw-r--r--libgcobol/config.h.in3
-rwxr-xr-xlibgcobol/configure74
-rw-r--r--libgcobol/configure.ac3
-rw-r--r--libgcobol/intrinsic.cc2
54 files changed, 1272 insertions, 134 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 264bbd2..5b54c5a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,74 @@
+2025-04-21 Jan Hubicka <hubicka@ucw.cz>
+
+ PR target/119879
+ * config/i386/i386.cc (fp_conversion_stmt_cost): Inline to ...
+ (ix86_vector_costs::add_stmt_cost): ... here; fix handling of NOP_EXPR.
+
+2025-04-21 Matthew Fortune <matthew.fortune@imgtec.com>
+
+ * config/mips/mips.cc (mips_option_override): Error out for
+ -mmicromips -mmsa.
+
+2025-04-21 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR middle-end/119507
+ * except.cc (switch_to_exception_section): Don't use the cached section if
+ the current function is in comdat.
+
+2025-04-21 Andrew Pinski <quic_apinski@quicinc.com>
+
+ * vec.h (array_slice::begin): Assert that the
+ slice is valid.
+ (array_slice::end): Likewise.
+
+2025-04-21 hongtao.liu <hongtao.liu@intel.com>
+
+ * config/i386/i386-expand.cc (ix86_emit_swdivsf): Generate 2
+ FMA instructions when TARGET_FMA.
+
+2025-04-19 Jeff Law <jlaw@ventanamicro.com>
+
+ PR target/119865
+ * config/riscv/riscv.cc (parse_features_for_version): Do not
+ explicitly free the architecture string.
+
+2025-04-19 Jeff Law <jlaw@ventanamicro.com>
+
+ PR target/118410
+ * config/riscv/bitmanip.md (logical with constant argument): New
+ splitter for cases where synthesizing ~C is cheaper than synthesizing
+ the original constant C.
+
+2025-04-19 Jan Hubicka <hubicka@ucw.cz>
+
+ * config/i386/i386.cc (vec_fp_conversion_cost): New function.
+ (ix86_rtx_costs): Use it for SSE/AVX FP conversoins.
+ (ix86_builtin_vectorization_cost): Fix indentation;
+ and use vec_fp_conversion_cost in vec_promote_demote.
+ (fp_conversion_stmt_cost): New function.
+ (ix86_vector_costs::add_stmt_cost): Use it to cost NOP_EXPR
+ and vec_promote_demote.
+ * config/i386/i386.h (struct processor_costs):
+ * config/i386/x86-tune-costs.h (struct processor_costs):
+
+2025-04-19 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR rtl-optimization/111949
+ * combine.cc (find_split_point): Add a split point
+ for `(and (not X) Y)` if not in the outer set already.
+
+2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+ PR target/111814
+ * config/sh/sh-modes.def (RESET_FLOAT_FORMAT): Use mips format.
+ (FLOAT_MODE): Use mips mode.
+
+2025-04-19 Maciej W. Rozycki <macro@orcam.me.uk>
+
+ * config/alpha/alpha.cc
+ (alpha_get_mem_rtx_alignment_and_offset): Recurse into
+ COMPONENT_REF nodes.
+
2025-04-18 Jeff Law <jlaw@ventanamicro.com>
* config/riscv/bitmanip.md (*bext<mode>_mask_pos): New pattern
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index f0d1b43..fa0255d 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20250419
+20250422
diff --git a/gcc/combine.cc b/gcc/combine.cc
index e118608..873c2bd 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -5280,6 +5280,12 @@ find_split_point (rtx *loc, rtx_insn *insn, bool set_src)
SUBST (XEXP (x, 0), XEXP (x, 1));
SUBST (XEXP (x, 1), tem);
}
+ /* Many targets have a `(and (not X) Y)` and/or `(ior (not X) Y)` instructions.
+ Split at that insns. However if this is
+ the SET_SRC, we likely do not have such an instruction and it's
+ worthless to try this split. */
+ if (!set_src && GET_CODE (XEXP (x, 0)) == NOT)
+ return loc;
break;
case PLUS:
diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index ba470d9..14e7da5 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -4291,14 +4291,10 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
tree mem = MEM_EXPR (expr);
if (mem != NULL_TREE)
- switch (TREE_CODE (mem))
- {
- case MEM_REF:
- tree_offset = mem_ref_offset (mem).force_shwi ();
- tree_align = get_object_alignment (get_base_address (mem));
- break;
+ {
+ HOST_WIDE_INT comp_offset = 0;
- case COMPONENT_REF:
+ for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0))
{
tree byte_offset = component_ref_field_offset (mem);
tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
@@ -4307,14 +4303,15 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
|| !poly_int_tree_p (byte_offset, &offset)
|| !tree_fits_shwi_p (bit_offset))
break;
- tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
+ comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
}
- tree_align = get_object_alignment (get_base_address (mem));
- break;
- default:
- break;
- }
+ if (TREE_CODE (mem) == MEM_REF)
+ {
+ tree_offset = comp_offset + mem_ref_offset (mem).force_shwi ();
+ tree_align = get_object_alignment (get_base_address (mem));
+ }
+ }
if (reg_align > mem_align)
{
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index cdfd94d..36f71eb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -19256,8 +19256,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
e1 = gen_reg_rtx (mode);
x1 = gen_reg_rtx (mode);
- /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
-
b = force_reg (mode, b);
/* x0 = rcp(b) estimate */
@@ -19270,20 +19268,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
UNSPEC_RCP)));
- /* e0 = x0 * b */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+ unsigned vector_size = GET_MODE_SIZE (mode);
+
+ /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a
+ N-R step with 2 fma implementation. */
+ if (TARGET_FMA
+ || (TARGET_AVX512F && vector_size == 64)
+ || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+ {
+ /* e0 = x0 * a */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
+ /* e1 = e0 * b - a */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b,
+ gen_rtx_NEG (mode, a))));
+ /* res = - e1 * x0 + e0 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode,
+ gen_rtx_NEG (mode, e1),
+ x0, e0)));
+ }
+ else
+ /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+ {
+ /* e0 = x0 * b */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
- /* e0 = x0 * e0 */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
+ /* e1 = x0 + x0 */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
- /* e1 = x0 + x0 */
- emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
+ /* e0 = x0 * e0 */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
- /* x1 = e1 - e0 */
- emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
+ /* x1 = e1 - e0 */
+ emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
- /* res = a * x1 */
- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ /* res = a * x1 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ }
}
/* Output code to perform a Newton-Rhapson approximation of a
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 38df84f..d15f91d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see
#include "i386-features.h"
#include "function-abi.h"
#include "rtl-error.h"
+#include "gimple-pretty-print.h"
/* This file should be included last. */
#include "target-def.h"
@@ -21816,6 +21817,25 @@ ix86_insn_cost (rtx_insn *insn, bool speed)
return insn_cost + pattern_cost (PATTERN (insn), speed);
}
+/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
+
+static int
+vec_fp_conversion_cost (const struct processor_costs *cost, int size)
+{
+ if (size < 128)
+ return cost->cvtss2sd;
+ else if (size < 256)
+ {
+ if (TARGET_SSE_SPLIT_REGS)
+ return cost->cvtss2sd * size / 64;
+ return cost->cvtss2sd;
+ }
+ if (size < 512)
+ return cost->vcvtps2pd256;
+ else
+ return cost->vcvtps2pd512;
+}
+
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result. */
@@ -22479,17 +22499,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
return false;
case FLOAT_EXTEND:
+ /* x87 represents all values extended to 80bit. */
if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
*total = 0;
else
- *total = ix86_vec_cost (mode, cost->addss);
+ *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
return false;
case FLOAT_TRUNCATE:
if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
*total = cost->fadd;
else
- *total = ix86_vec_cost (mode, cost->addss);
+ *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
return false;
case ABS:
@@ -24683,7 +24704,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
switch (type_of_cost)
{
case scalar_stmt:
- return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
+ return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
case scalar_load:
/* load/store costs are relative to register move which is 2. Recompute
@@ -24754,7 +24775,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return ix86_cost->cond_not_taken_branch_cost;
case vec_perm:
+ return ix86_vec_cost (mode, ix86_cost->sse_op);
+
case vec_promote_demote:
+ if (fp)
+ return vec_fp_conversion_cost (ix86_tune_cost, mode);
return ix86_vec_cost (mode, ix86_cost->sse_op);
case vec_construct:
@@ -25342,6 +25367,9 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
(TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
stmt_cost = 0;
+ else if (fp)
+ stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
break;
case BIT_IOR_EXPR:
@@ -25383,6 +25411,29 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
break;
}
+ if (kind == vec_promote_demote
+ && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
+ {
+ int outer_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
+ int inner_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
+ int stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
+ up doing two conversions and packing them. */
+ if (inner_size > outer_size)
+ {
+ int n = inner_size / outer_size;
+ stmt_cost = stmt_cost * n
+ + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ }
+
/* If we do elementwise loads into a vector then we are bound by
latency and execution resources for the many scalar loads
(AGU and load ports). Try to account for this by scaling the
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8507243..18aa42d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -207,6 +207,12 @@ struct processor_costs {
const int divsd; /* cost of DIVSD instructions. */
const int sqrtss; /* cost of SQRTSS instructions. */
const int sqrtsd; /* cost of SQRTSD instructions. */
+ const int cvtss2sd; /* cost SSE FP conversions,
+ such as CVTSS2SD. */
+ const int vcvtps2pd256; /* cost 256bit packed FP conversions,
+ such as VCVTPD2PS with larger reg in ymm. */
+ const int vcvtps2pd512; /* cost 512bit packed FP conversions,
+ such as VCVTPD2PS with larger reg in zmm. */
const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
/* Specify reassociation width for integer,
fp, vector integer and vector fp
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 9477345..cddcf61 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -121,16 +121,19 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (2), /* cost of FCHS instruction. */
COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
- COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */
- COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */
- COSTS_N_BYTES (2), /* cost of MULSS instruction. */
- COSTS_N_BYTES (2), /* cost of MULSD instruction. */
- COSTS_N_BYTES (2), /* cost of FMA SS instruction. */
- COSTS_N_BYTES (2), /* cost of FMA SD instruction. */
- COSTS_N_BYTES (2), /* cost of DIVSS instruction. */
- COSTS_N_BYTES (2), /* cost of DIVSD instruction. */
- COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */
- COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */
+ COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */
+ COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_BYTES (4), /* cost of MULSS instruction. */
+ COSTS_N_BYTES (4), /* cost of MULSD instruction. */
+ COSTS_N_BYTES (4), /* cost of FMA SS instruction. */
+ COSTS_N_BYTES (4), /* cost of FMA SD instruction. */
+ COSTS_N_BYTES (4), /* cost of DIVSS instruction. */
+ COSTS_N_BYTES (4), /* cost of DIVSD instruction. */
+ COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */
+ COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */
+ COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
ix86_size_memcpy,
ix86_size_memset,
@@ -243,6 +246,9 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (88), /* cost of DIVSD instruction. */
COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i386_memcpy,
i386_memset,
@@ -356,6 +362,9 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (74), /* cost of DIVSD instruction. */
COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i486_memcpy,
i486_memset,
@@ -467,6 +476,9 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (39), /* cost of DIVSD instruction. */
COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -571,6 +583,9 @@ struct processor_costs lakemont_cost = {
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -690,6 +705,9 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (18), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentiumpro_memcpy,
pentiumpro_memset,
@@ -800,6 +818,9 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (47), /* cost of DIVSD instruction. */
COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
geode_memcpy,
geode_memset,
@@ -913,6 +934,9 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (56), /* cost of DIVSD instruction. */
COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k6_memcpy,
k6_memset,
@@ -1027,6 +1051,9 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (24), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
athlon_memcpy,
athlon_memset,
@@ -1150,6 +1177,9 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k8_memcpy,
k8_memset,
@@ -1281,6 +1311,9 @@ struct processor_costs amdfam10_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
amdfam10_memcpy,
amdfam10_memset,
@@ -1405,6 +1438,9 @@ const struct processor_costs bdver_cost = {
COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver_memcpy,
bdver_memset,
@@ -1553,6 +1589,10 @@ struct processor_costs znver1_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ /* Real latency is 4, but for split regs multiply cost of half op by 2. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
and it can execute 2 integer additions and 2 multiplications thus
reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
@@ -1712,6 +1752,9 @@ struct processor_costs znver2_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1847,6 +1890,9 @@ struct processor_costs znver3_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1984,6 +2030,10 @@ struct processor_costs znver4_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ /* Real latency is 6, but for split regs multiply cost of half op by 2. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -2135,6 +2185,9 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
/* DIVSD has throughtput 0.13 and latency 20. */
COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
/* Zen5 can execute:
- integer ops: 6 per cycle, at most 3 multiplications.
latency 1 for additions, 3 for multiplications (pipelined)
@@ -2274,6 +2327,9 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
skylake_memcpy,
skylake_memset,
@@ -2403,6 +2459,9 @@ struct processor_costs icelake_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
icelake_memcpy,
icelake_memset,
@@ -2526,6 +2585,9 @@ struct processor_costs alderlake_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
alderlake_memcpy,
alderlake_memset,
@@ -2642,6 +2704,9 @@ const struct processor_costs btver1_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver1_memcpy,
btver1_memset,
@@ -2755,6 +2820,9 @@ const struct processor_costs btver2_cost = {
COSTS_N_INSNS (19), /* cost of DIVSD instruction. */
COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver2_memcpy,
btver2_memset,
@@ -2867,6 +2935,9 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (38), /* cost of DIVSD instruction. */
COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium4_memcpy,
pentium4_memset,
@@ -2982,6 +3053,9 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (40), /* cost of DIVSD instruction. */
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
nocona_memcpy,
nocona_memset,
@@ -3095,6 +3169,9 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
atom_memcpy,
atom_memset,
@@ -3208,6 +3285,9 @@ struct processor_costs slm_cost = {
COSTS_N_INSNS (69), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
slm_memcpy,
slm_memset,
@@ -3335,6 +3415,9 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
tremont_memcpy,
tremont_memset,
@@ -3448,6 +3531,9 @@ struct processor_costs intel_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
intel_memcpy,
intel_memset,
@@ -3566,6 +3652,9 @@ struct processor_costs lujiazui_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
lujiazui_memcpy,
lujiazui_memset,
@@ -3682,6 +3771,9 @@ struct processor_costs yongfeng_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
yongfeng_memcpy,
yongfeng_memset,
@@ -3798,6 +3890,9 @@ struct processor_costs shijidadao_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
shijidadao_memcpy,
shijidadao_memset,
@@ -3922,6 +4017,9 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
generic_memcpy,
generic_memset,
@@ -4051,6 +4149,9 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (32), /* cost of DIVSD instruction. */
COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
core_memcpy,
core_memset,
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 24a28dc..0d3d026 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -20678,6 +20678,9 @@ mips_option_override (void)
"-mcompact-branches=never");
}
+ if (is_micromips && TARGET_MSA)
+ error ("unsupported combination: %s", "-mmicromips -mmsa");
+
/* Require explicit relocs for MIPS R6 onwards. This enables simplification
of the compact branch and jump support through the backend. */
if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6)
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 2a3884c..d0919ec 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1263,3 +1263,41 @@
expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands);
DONE;
})
+
+;; If we have an XOR/IOR with a constant operand (C) and the we can
+;; synthesize ~C more efficiently than C, then synthesize ~C and use
+;; xnor/orn instead.
+;;
+;; The same can be done for AND, but mvconst_internal's issues get in
+;; the way. That's future work.
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (any_or:X (match_operand:X 1 "register_operand")
+ (match_operand:X 2 "const_int_operand")))
+ (clobber (match_operand:X 3 "register_operand"))]
+ "TARGET_ZBB
+ && (riscv_const_insns (operands[2], true)
+ > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))"
+ [(const_int 0)]
+{
+ /* Get the inverted constant into the temporary register. */
+ riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2])));
+
+ /* For xnor, the NOT operation is in a different position. So
+ we have to customize the split code we generate a bit.
+
+ It is expected that AND will be handled like IOR in the future. */
+ if (<CODE> == XOR)
+ {
+ rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]);
+ x = gen_rtx_NOT (<X:MODE>mode, x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ }
+ else
+ {
+ rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]);
+ x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ }
+ DONE;
+})
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d3656a7..bad59e2 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -13136,9 +13136,6 @@ parse_features_for_version (tree decl,
DECL_SOURCE_LOCATION (decl));
gcc_assert (parse_res);
- if (arch_string != default_opts->x_riscv_arch_string)
- free (CONST_CAST (void *, (const void *) arch_string));
-
cl_target_option_restore (&global_options, &global_options_set,
&cur_target);
}
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 51eb64f..3ab4d76 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2136,18 +2136,34 @@
(match_operand 7 "const_int_operand")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (vec_duplicate:V_VLS
- (match_operand:<VEL> 3 "direct_broadcast_operand"))
+ ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below.
+ (match_operand:<VEL> 3 "direct_broadcast_operand") ;; )
(match_operand:V_VLS 2 "vector_merge_operand")))]
"TARGET_VECTOR"
{
/* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f
has better chances to do vsetvl fusion in vsetvl pass. */
+ bool wrap_vec_dup = true;
+ rtx vec_cst = NULL_RTX;
if (riscv_vector::splat_to_scalar_move_p (operands))
{
operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
operands[3] = force_reg (<VEL>mode, operands[3]);
}
+ else if (immediate_operand (operands[3], <VEL>mode)
+ && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3]))
+ && (/* -> pred_broadcast<mode>_zero */
+ (vector_least_significant_set_mask_operand (operands[1],
+ <VM>mode)
+ && vector_const_0_operand (vec_cst, <MODE>mode))
+ || (/* pred_broadcast<mode>_imm */
+ vector_all_trues_mask_operand (operands[1], <VM>mode)
+ && vector_const_int_or_double_0_operand (vec_cst,
+ <MODE>mode))))
+ {
+ operands[3] = vec_cst;
+ wrap_vec_dup = false;
+ }
/* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */
else if (satisfies_constraint_Wdm (operands[3]))
{
@@ -2191,6 +2207,8 @@
;
else
operands[3] = force_reg (<VEL>mode, operands[3]);
+ if (wrap_vec_dup)
+ operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]);
})
(define_insn_and_split "*pred_broadcast<mode>"
diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def
index 80650b4..e31ae69 100644
--- a/gcc/config/sh/sh-modes.def
+++ b/gcc/config/sh/sh-modes.def
@@ -17,6 +17,12 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+/* SH has the same reversed quiet bit as MIPS. */
+RESET_FLOAT_FORMAT (SF, mips_single_format);
+RESET_FLOAT_FORMAT (DF, mips_double_format);
+/* TFmode: IEEE quad floating point (software). */
+FLOAT_MODE (TF, 16, mips_quad_format);
+
/* Vector modes. */
VECTOR_MODE (INT, QI, 2); /* V2QI */
VECTOR_MODES (INT, 4); /* V4QI V2HI */
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 644b36a..e85a710 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,31 @@
+2025-04-21 Jason Merrill <jason@redhat.com>
+
+ * constexpr.cc (cxx_eval_outermost_constant_expr): Move
+ verify_constant later.
+
+2025-04-21 Jason Merrill <jason@redhat.com>
+
+ PR c++/118775
+ * constexpr.cc (cxx_eval_call_expression): Add assert.
+ (fold_to_constant): Handle processing_template_decl.
+ * init.cc (build_new_1): Use fold_to_constant.
+
+2025-04-21 Jason Merrill <jason@redhat.com>
+
+ PR c++/99456
+ * constexpr.cc (cxx_eval_constant_expression): Check strict
+ instead of manifestly_const_eval.
+ (maybe_constant_init_1): Be strict for static constexpr vars.
+
+2025-04-19 Jason Merrill <jason@redhat.com>
+
+ * coroutines.cc (coro_build_expr_stmt)
+ (coro_build_cvt_void_expr_stmt): Remove.
+ (build_actor_fn): Use finish_expr_stmt.
+ * semantics.cc (finish_expr_stmt): Avoid wrapping statement in
+ EXPR_STMT.
+ (finish_stmt_expr_expr): Add comment.
+
2025-04-17 Jason Merrill <jason@redhat.com>
* constexpr.cc (is_valid_constexpr_fn): Improve diagnostic.
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index f56c5c4..8a11e62 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -2956,12 +2956,11 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t,
gcc_assert (arg0);
if (new_op_p)
{
- /* FIXME: We should not get here; the VERIFY_CONSTANT above
- should have already caught it. But currently a conversion
- from pointer type to arithmetic type is only considered
- non-constant for CONVERT_EXPRs, not NOP_EXPRs. */
if (!tree_fits_uhwi_p (arg0))
{
+ /* We should not get here; the VERIFY_CONSTANT above
+ should have already caught it. */
+ gcc_checking_assert (false);
if (!ctx->quiet)
error_at (loc, "cannot allocate array: size not constant");
*non_constant_p = true;
@@ -8479,7 +8478,7 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t,
if (TREE_CODE (t) == CONVERT_EXPR
&& ARITHMETIC_TYPE_P (type)
&& INDIRECT_TYPE_P (TREE_TYPE (op))
- && ctx->manifestly_const_eval == mce_true)
+ && ctx->strict)
{
if (!ctx->quiet)
error_at (loc,
@@ -9228,11 +9227,6 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant,
if (r == void_node && !constexpr_dtor && ctx.ctor)
r = ctx.ctor;
- if (!constexpr_dtor)
- verify_constant (r, allow_non_constant, &non_constant_p, &overflow_p);
- else
- DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (object) = true;
-
unsigned int i;
tree cleanup;
/* Evaluate the cleanups. */
@@ -9251,15 +9245,6 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant,
non_constant_p = true;
}
- if (TREE_CODE (r) == CONSTRUCTOR && CONSTRUCTOR_NO_CLEARING (r))
- {
- if (!allow_non_constant)
- error ("%qE is not a constant expression because it refers to "
- "an incompletely initialized variable", t);
- TREE_CONSTANT (r) = false;
- non_constant_p = true;
- }
-
if (!non_constant_p && cxx_dialect >= cxx20
&& !global_ctx.heap_vars.is_empty ())
{
@@ -9316,6 +9301,21 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant,
non_constant_p = true;
}
+ if (!non_constant_p && !constexpr_dtor)
+ verify_constant (r, allow_non_constant, &non_constant_p, &overflow_p);
+
+ /* After verify_constant because reduced_constant_expression_p can unset
+ CONSTRUCTOR_NO_CLEARING. */
+ if (!non_constant_p
+ && TREE_CODE (r) == CONSTRUCTOR && CONSTRUCTOR_NO_CLEARING (r))
+ {
+ if (!allow_non_constant)
+ error ("%qE is not a constant expression because it refers to "
+ "an incompletely initialized variable", t);
+ TREE_CONSTANT (r) = false;
+ non_constant_p = true;
+ }
+
if (non_constant_p)
/* If we saw something bad, go back to our argument. The wrapping below is
only for the cases of TREE_CONSTANT argument or overflow. */
@@ -9332,13 +9332,17 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant,
if (non_constant_p && !allow_non_constant)
return error_mark_node;
- else if (constexpr_dtor)
- return r;
else if (non_constant_p && TREE_CONSTANT (r))
r = mark_non_constant (r);
else if (non_constant_p)
return t;
+ if (constexpr_dtor)
+ {
+ DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (object) = true;
+ return r;
+ }
+
/* Check we are not trying to return the wrong type. */
if (!same_type_ignoring_top_level_qualifiers_p (type, TREE_TYPE (r)))
{
@@ -9490,6 +9494,9 @@ fold_simple (tree t)
tree
fold_to_constant (tree t)
{
+ if (processing_template_decl)
+ return t;
+
tree r = fold (t);
if (CONSTANT_CLASS_P (r) && !TREE_OVERFLOW (r))
return r;
@@ -9747,16 +9754,26 @@ maybe_constant_init_1 (tree t, tree decl, bool allow_non_constant,
{
/* [basic.start.static] allows constant-initialization of variables with
static or thread storage duration even if it isn't required, but we
- shouldn't bend the rules the same way for automatic variables. */
+ shouldn't bend the rules the same way for automatic variables.
+
+ But still enforce the requirements of constexpr/constinit.
+ [dcl.constinit] "If a variable declared with the constinit specifier
+ has dynamic initialization, the program is ill-formed, even if the
+ implementation would perform that initialization as a static
+ initialization." */
bool is_static = (decl && DECL_P (decl)
&& (TREE_STATIC (decl) || DECL_EXTERNAL (decl)));
+ bool strict = (!is_static
+ || (decl && DECL_P (decl)
+ && (DECL_DECLARED_CONSTEXPR_P (decl)
+ || DECL_DECLARED_CONSTINIT_P (decl))));
if (is_static)
manifestly_const_eval = mce_true;
if (cp_unevaluated_operand && manifestly_const_eval != mce_true)
return fold_to_constant (t);
- t = cxx_eval_outermost_constant_expr (t, allow_non_constant, !is_static,
+ t = cxx_eval_outermost_constant_expr (t, allow_non_constant, strict,
manifestly_const_eval,
false, decl);
}
diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index b92d09f..743da06 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -1852,21 +1852,6 @@ coro_build_frame_access_expr (tree coro_ref, tree member_id, bool preserve_ref,
return expr;
}
-/* Helpers to build EXPR_STMT and void-cast EXPR_STMT, common ops. */
-
-static tree
-coro_build_expr_stmt (tree expr, location_t loc)
-{
- return maybe_cleanup_point_expr_void (build_stmt (loc, EXPR_STMT, expr));
-}
-
-static tree
-coro_build_cvt_void_expr_stmt (tree expr, location_t loc)
-{
- tree t = build1 (CONVERT_EXPR, void_type_node, expr);
- return coro_build_expr_stmt (t, loc);
-}
-
/* Helpers to build an artificial var, with location LOC, NAME and TYPE, in
CTX, and with initializer INIT. */
@@ -2582,8 +2567,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody,
tree hfa = build_new_method_call (ash, hfa_m, &args, NULL_TREE, LOOKUP_NORMAL,
NULL, tf_warning_or_error);
r = cp_build_init_expr (ash, hfa);
- r = coro_build_cvt_void_expr_stmt (r, loc);
- add_stmt (r);
+ finish_expr_stmt (r);
release_tree_vector (args);
/* Now we know the real promise, and enough about the frame layout to
@@ -2678,8 +2662,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody,
we must tail call them. However, some targets do not support indirect
tail calls to arbitrary callees. See PR94359. */
CALL_EXPR_TAILCALL (resume) = true;
- resume = coro_build_cvt_void_expr_stmt (resume, loc);
- add_stmt (resume);
+ finish_expr_stmt (resume);
r = build_stmt (loc, RETURN_EXPR, NULL);
gcc_checking_assert (maybe_cleanup_point_expr_void (r) == r);
diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index e589e45..062a493 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -3405,7 +3405,7 @@ build_new_1 (vec<tree, va_gc> **placement, tree type, tree nelts,
errval = throw_bad_array_new_length ();
if (outer_nelts_check != NULL_TREE)
size = build3 (COND_EXPR, sizetype, outer_nelts_check, size, errval);
- size = cp_fully_fold (size);
+ size = fold_to_constant (size);
/* Create the argument list. */
vec_safe_insert (*placement, 0, size);
/* Do name-lookup to find the appropriate operator. */
@@ -3462,7 +3462,7 @@ build_new_1 (vec<tree, va_gc> **placement, tree type, tree nelts,
outer_nelts_check = NULL_TREE;
}
- size = cp_fully_fold (size);
+ size = fold_to_constant (size);
/* If size is zero e.g. due to type having zero size, try to
preserve outer_nelts for constant expression evaluation
purposes. */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 7f23efd..1aa35d3 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -1180,10 +1180,13 @@ finish_expr_stmt (tree expr)
expr = error_mark_node;
/* Simplification of inner statement expressions, compound exprs,
- etc can result in us already having an EXPR_STMT. */
+ etc can result in us already having an EXPR_STMT or other statement
+ tree. Don't wrap them in EXPR_STMT. */
if (TREE_CODE (expr) != CLEANUP_POINT_EXPR)
{
- if (TREE_CODE (expr) != EXPR_STMT)
+ if (TREE_CODE (expr) != EXPR_STMT
+ && !STATEMENT_CLASS_P (expr)
+ && TREE_CODE (expr) != STATEMENT_LIST)
expr = build_stmt (loc, EXPR_STMT, expr);
expr = maybe_cleanup_point_expr_void (expr);
}
@@ -3082,6 +3085,7 @@ finish_stmt_expr_expr (tree expr, tree stmt_expr)
}
else if (processing_template_decl)
{
+ /* Not finish_expr_stmt because we don't want convert_to_void. */
expr = build_stmt (input_location, EXPR_STMT, expr);
expr = add_stmt (expr);
/* Mark the last statement so that we can recognize it as such at
diff --git a/gcc/except.cc b/gcc/except.cc
index 205811c..0fe1e09 100644
--- a/gcc/except.cc
+++ b/gcc/except.cc
@@ -2949,7 +2949,14 @@ switch_to_exception_section (const char * ARG_UNUSED (fnname))
{
section *s;
- if (exception_section)
+ if (exception_section
+ /* Don't use the cached section for comdat if it will be different. */
+#ifdef HAVE_LD_EH_GC_SECTIONS
+ && !(targetm_common.have_named_sections
+ && DECL_COMDAT_GROUP (current_function_decl)
+ && HAVE_COMDAT_GROUP)
+#endif
+ )
s = exception_section;
else
{
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 1c45bdb..56325a9 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,10 @@
+2025-04-19 Steven G. Kargl <kargl@gcc.gnu.org>
+
+ PR fortran/119836
+ * resolve.cc (check_pure_function): Fix checking for
+ an impure subprogram within a DO CONCURRENT construct.
+ (pure_subroutine): Ditto.
+
2025-04-16 Harald Anlauf <anlauf@gmx.de>
PR fortran/106948
diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index 2ecbd50..f03708e 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -3260,14 +3260,30 @@ static bool check_pure_function (gfc_expr *e)
gfc_do_concurrent_flag = 0 when the check for an impure function
occurs. Check the stack to see if the source code has a nested
BLOCK construct. */
+
for (stack = cs_base; stack; stack = stack->prev)
{
- if (stack->current->op == EXEC_BLOCK) saw_block = true;
+ if (!saw_block && stack->current->op == EXEC_BLOCK)
+ {
+ saw_block = true;
+ continue;
+ }
+
if (saw_block && stack->current->op == EXEC_DO_CONCURRENT)
{
- gfc_error ("Reference to impure function at %L inside a "
- "DO CONCURRENT", &e->where);
- return false;
+ bool is_pure;
+ is_pure = (e->value.function.isym
+ && (e->value.function.isym->pure
+ || e->value.function.isym->elemental))
+ || (e->value.function.esym
+ && (e->value.function.esym->attr.pure
+ || e->value.function.esym->attr.elemental));
+ if (!is_pure)
+ {
+ gfc_error ("Reference to impure function at %L inside a "
+ "DO CONCURRENT", &e->where);
+ return false;
+ }
}
}
@@ -3663,16 +3679,29 @@ pure_subroutine (gfc_symbol *sym, const char *name, locus *loc)
/* A BLOCK construct within a DO CONCURRENT construct leads to
gfc_do_concurrent_flag = 0 when the check for an impure subroutine
- occurs. Check the stack to see if the source code has a nested
- BLOCK construct. */
+ occurs. Walk up the stack to see if the source code has a nested
+ construct. */
+
for (stack = cs_base; stack; stack = stack->prev)
{
- if (stack->current->op == EXEC_BLOCK) saw_block = true;
+ if (stack->current->op == EXEC_BLOCK)
+ {
+ saw_block = true;
+ continue;
+ }
+
if (saw_block && stack->current->op == EXEC_DO_CONCURRENT)
{
- gfc_error ("Subroutine call at %L in a DO CONCURRENT block "
- "is not PURE", loc);
- return false;
+
+ bool is_pure = true;
+ is_pure = sym->attr.pure || sym->attr.elemental;
+
+ if (!is_pure)
+ {
+ gfc_error ("Subroutine call at %L in a DO CONCURRENT block "
+ "is not PURE", loc);
+ return false;
+ }
}
}
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4cdc9c1..4c219bd 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,78 @@
+2025-04-21 Jason Merrill <jason@redhat.com>
+
+ PR c++/118775
+ * g++.dg/cpp2a/constexpr-new24.C: Adjust diagnostic.
+
+2025-04-21 Andrew Bennett <andrew.bennett@imgtec.com>
+
+ * gcc.dg/memcpy-4.c: Remove mips specific code.
+ * gcc.target/mips/memcpy-2.c: New test.
+
+2025-04-21 Matthew Fortune <matthew.fortune@imgtec.com>
+
+ * gcc.target/mips/clear-cache-1.c: Also allow jrc.
+
+2025-04-21 Matthew Fortune <matthew.fortune@imgtec.com>
+
+ * gcc.dg/tree-ssa/ssa-dom-cse-2.c: Do not check output for
+ MIPS lp64 abi.
+
+2025-04-21 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR middle-end/119507
+ * g++.dg/eh/pr119507.C: New test.
+
+2025-04-21 hongtao.liu <hongtao.liu@intel.com>
+
+ * gcc.target/i386/recip-vec-divf-fma.c: New test.
+
+2025-04-20 H.J. Lu <hjl.tools@gmail.com>
+
+ PR target/117863
+ * gcc.dg/rtl/i386/vector_eq-2.c: New test.
+ * gcc.dg/rtl/i386/vector_eq-3.c: Likewise.
+
+2025-04-19 Thomas Schwinge <tschwinge@baylibre.com>
+
+ PR testsuite/119508
+ * rust/compile/nr2/compile.exp: Disable parallel testing.
+
+2025-04-19 Co-authored-by: Jeff Law <jlaw@ventanamicro.com>
+
+ PR target/118410
+ * gcc.target/riscv/pr118410-1.c: New test.
+ * gcc.target/riscv/pr118410-2.c: Likewise.
+
+2025-04-19 Andrew Pinski <quic_apinski@quicinc.com>
+
+ * gcc.dg/pr118947-1.c: Use 1025 as the size of the buf.
+ * gcc.dg/pr78408-3.c: Likewise.
+
+2025-04-19 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR rtl-optimization/111949
+ * gcc.target/aarch64/bic-1.c: New test.
+
+2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+ PR target/111814
+ * gcc.target/sh/pr111814.c: New test.
+
+2025-04-19 Maciej W. Rozycki <macro@orcam.me.uk>
+
+ * gcc.target/alpha/memcpy-nested-offset-long.c: New file.
+ * gcc.target/alpha/memcpy-nested-offset-quad.c: New file.
+
+2025-04-19 Steven G. Kargl <kargl@gcc.gnu.org>
+
+ PR fortran/119836
+ * gfortran.dg/do_concurrent_all_clauses.f90: Remove invalid
+ dg-error test.
+ * gfortran.dg/pr119836_1.f90: New test.
+ * gfortran.dg/pr119836_2.f90: New test.
+ * gfortran.dg/pr119836_3.f90: New test.
+ * gfortran.dg/pr119836_4.f90: New test.
+
2025-04-18 Thomas Schwinge <tschwinge@baylibre.com>
PR cobol/119818
diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C
index ee62f18..17c9f54 100644
--- a/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C
+++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C
@@ -6,14 +6,14 @@ int a;
constexpr char *
f1 ()
{
- constexpr auto p = new char[(long int) &a]; // { dg-error "size not constant" }
+ constexpr auto p = new char[(long int) &a]; // { dg-error "conversion from pointer" }
return p;
}
constexpr char *
f2 ()
{
- auto p = new char[(long int) &a]; // { dg-error "size not constant" }
+ auto p = new char[(long int) &a]; // { dg-error "conversion from pointer" }
return p;
}
diff --git a/gcc/testsuite/g++.dg/eh/pr119507.C b/gcc/testsuite/g++.dg/eh/pr119507.C
new file mode 100644
index 0000000..c68536f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/eh/pr119507.C
@@ -0,0 +1,19 @@
+// { dg-do compile { target comdat_group } }
+// ARM EABI has its own exception handling data handling and does not use gcc_except_table
+// { dg-skip-if "!TARGET_EXCEPTION_DATA" { arm_eabi } }
+// Force off function sections
+// Force on exceptions
+// { dg-options "-fno-function-sections -fexceptions" }
+// PR middle-end/119507
+
+
+inline int comdat() { try { throw 1; } catch (int) { return 1; } return 0; }
+int another_func_with_exception() { try { throw 1; } catch (int) { return 1; } return 0; }
+inline int comdat1() { try { throw 1; } catch (int) { return 1; } return 0; }
+int foo() { return comdat() + comdat1(); }
+
+// Make sure the gcc puts the exception table for both comdat and comdat1 in their own section
+// { dg-final { scan-assembler-times ".section\[\t \]\[^\n\]*.gcc_except_table._Z6comdatv" 1 } }
+// { dg-final { scan-assembler-times ".section\[\t \]\[^\n\]*.gcc_except_table._Z7comdat1v" 1 } }
+// There should be 3 exception tables,
+// { dg-final { scan-assembler-times ".section\[\t \]\[^\n\]*.gcc_except_table" 3 } }
diff --git a/gcc/testsuite/gcc.dg/memcpy-4.c b/gcc/testsuite/gcc.dg/memcpy-4.c
index 4c726f0..b17b369 100644
--- a/gcc/testsuite/gcc.dg/memcpy-4.c
+++ b/gcc/testsuite/gcc.dg/memcpy-4.c
@@ -1,13 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-rtl-expand" } */
+/* { dg-options "-O2" } */
-#ifdef __mips
-__attribute__((nomips16))
-#endif
void
f1 (char *p)
{
__builtin_memcpy (p, "12345", 5);
}
-
-/* { dg-final { scan-rtl-dump "mem/u.*mem/u" "expand" { target mips*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/pr118947-1.c b/gcc/testsuite/gcc.dg/pr118947-1.c
index 70b7f80..8733e8d 100644
--- a/gcc/testsuite/gcc.dg/pr118947-1.c
+++ b/gcc/testsuite/gcc.dg/pr118947-1.c
@@ -6,10 +6,10 @@
void* aaa();
void* bbb()
{
- char buf[32] = {};
+ char buf[1025] = {};
/* Tha call to aaa should not matter and clobber buf. */
void* ret = aaa();
- __builtin_memcpy(ret, buf, 32);
+ __builtin_memcpy(ret, buf, sizeof(buf));
return ret;
}
diff --git a/gcc/testsuite/gcc.dg/pr78408-3.c b/gcc/testsuite/gcc.dg/pr78408-3.c
index 3de90d0..5ea5458 100644
--- a/gcc/testsuite/gcc.dg/pr78408-3.c
+++ b/gcc/testsuite/gcc.dg/pr78408-3.c
@@ -7,8 +7,8 @@ void* aaa();
void* bbb()
{
void* ret = aaa();
- char buf[32] = {};
- __builtin_memcpy(ret, buf, 32);
+ char buf[1025] = {};
+ __builtin_memcpy(ret, buf, sizeof(buf));
return ret;
}
diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
new file mode 100644
index 0000000..871d489
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
@@ -0,0 +1,71 @@
+/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-additional-options "-O2 -march=x86-64-v3" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v2di __attribute__((vector_size(16)));
+
+v4si __RTL (startwith ("vregs1")) foo1 (void)
+{
+(function "foo1"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 4 (set (reg:V4SI <1>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 5 (set (reg:V4SI <2>)
+ (eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>))))
+ (cinsn 6 (set (reg:V4SI <3>) (reg:V4SI <2>)))
+ (cinsn 7 (set (reg:V4SI xmm0) (reg:V4SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V4SI xmm0)))
+)
+}
+
+v8si __RTL (startwith ("vregs1")) foo2 (void)
+{
+(function "foo2"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V8SI <0>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 4 (set (reg:V8SI <1>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 5 (set (reg:V8SI <2>)
+ (eq:V8SI (reg:V8SI <0>) (reg:V8SI <1>))))
+ (cinsn 6 (set (reg:V8SI <3>) (reg:V8SI <2>)))
+ (cinsn 7 (set (reg:V8SI xmm0) (reg:V8SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V8SI xmm0)))
+)
+}
+
+v2di __RTL (startwith ("vregs1")) foo3 (void)
+{
+(function "foo3"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V2DI <0>) (const_vector:V2DI [(const_int -1) (const_int -1)])))
+ (cinsn 4 (set (reg:V2DI <1>) (const_vector:V2DI [(const_int -1) (const_int -1)])))
+ (cinsn 5 (set (reg:V2DI <2>)
+ (eq:V2DI (reg:V2DI <0>) (reg:V2DI <1>))))
+ (cinsn 6 (set (reg:V2DI <3>) (reg:V2DI <2>)))
+ (cinsn 7 (set (reg:V2DI xmm0) (reg:V2DI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V2DI xmm0)))
+)
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */
diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c
new file mode 100644
index 0000000..276c4c2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c
@@ -0,0 +1,74 @@
+/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-additional-options "-O2 -march=x86-64-v3" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v2di __attribute__((vector_size(16)));
+
+v4si __RTL (startwith ("vregs1")) foo1 (void)
+{
+(function "foo1"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V4SI <1>)
+ (mem:V4SI (reg:SI di) [0 ptr S128 A128])))
+ (cinsn 4 (set (reg:V4SI <2>)
+ (eq:V4SI (reg:V4SI <1>)
+ (mem:V4SI (reg:SI di) [0 ptr S128 A128]))))
+ (cinsn 5 (set (reg:V4SI <3>) (reg:V4SI <2>)))
+ (cinsn 6 (set (reg:V4SI xmm0) (reg:V4SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V4SI xmm0)))
+)
+}
+
+v8si __RTL (startwith ("vregs1")) foo2 (void)
+{
+(function "foo2"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V8SI <1>)
+ (mem:V8SI (reg:SI di) [0 ptr S256 A256])))
+ (cinsn 4 (set (reg:V8SI <2>)
+ (eq:V8SI (mem:V8SI (reg:SI di) [0 ptr S256 A256])
+ (reg:V8SI <1>))))
+ (cinsn 5 (set (reg:V8SI <3>) (reg:V8SI <2>)))
+ (cinsn 6 (set (reg:V8SI xmm0) (reg:V8SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V8SI xmm0)))
+)
+}
+
+v2di __RTL (startwith ("vregs1")) foo3 (void)
+{
+(function "foo3"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V2DI <1>)
+ (mem:V2DI (reg:SI di) [0 ptr S128 A128])))
+ (cinsn 4 (set (reg:V2DI <2>)
+ (eq:V2DI (reg:V2DI <1>)
+ (mem:V2DI (reg:SI di) [0 ptr S128 A128]))))
+ (cinsn 5 (set (reg:V2DI <3>) (reg:V2DI <2>)))
+ (cinsn 6 (set (reg:V2DI xmm0) (reg:V2DI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V2DI xmm0)))
+)
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
index a879d30..6fa52f6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
@@ -27,4 +27,4 @@ foo ()
but the loop reads only one element at a time, and DOM cannot resolve these.
The same happens on powerpc depending on the SIMD support available. */
-/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* nvptx*-*-* mmix-knuth-mmixware } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
+/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* nvptx*-*-* mmix-knuth-mmixware } || { { { lp64 && { mips*-*-* powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/bic-1.c b/gcc/testsuite/gcc.target/aarch64/bic-1.c
new file mode 100644
index 0000000..65e1514
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/* PR rtl-optmization/111949 */
+
+/*
+**func1:
+** bic w([0-9]+), w0, w1
+** and w0, w\1, 1
+** ret
+*/
+
+unsigned func1(unsigned a, bool b)
+{
+ int c = a & b;
+ return (c ^ a)&1;
+}
+
+/*
+**func2:
+** bic w([0-9]+), w1, w0
+** and w0, w\1, 255
+** ret
+*/
+unsigned func2(bool a, bool b)
+{
+ return ~a & b;
+}
+
+/*
+**func3:
+** bic w([0-9]+), w1, w0
+** and w0, w\1, 1
+** ret
+*/
+bool func3(bool a, unsigned char b)
+{
+ return !a & b;
+}
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c
new file mode 100644
index 0000000..631d14f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+typedef unsigned int __attribute__ ((mode (DI))) int64_t;
+typedef unsigned int __attribute__ ((mode (SI))) int32_t;
+
+typedef union
+ {
+ int32_t l[8];
+ }
+val;
+
+typedef struct
+ {
+ int32_t l[2];
+ val v;
+ }
+tre;
+
+typedef struct
+ {
+ int32_t l[3];
+ tre t;
+ }
+due;
+
+typedef struct
+ {
+ val v;
+ int64_t q;
+ int32_t l[2];
+ due d;
+ }
+uno;
+
+void
+memcpy_nested_offset_long (uno *u)
+{
+ u->d.t.v = u->v;
+}
+
+/* Expect assembly such as:
+
+ ldq $4,0($16)
+ ldq $3,8($16)
+ ldq $2,16($16)
+ srl $4,32,$7
+ ldq $1,24($16)
+ srl $3,32,$6
+ stl $4,68($16)
+ srl $2,32,$5
+ stl $7,72($16)
+ srl $1,32,$4
+ stl $3,76($16)
+ stl $6,80($16)
+ stl $2,84($16)
+ stl $5,88($16)
+ stl $1,92($16)
+ stl $4,96($16)
+
+ that is with four quadword loads at offsets 0, 8, 16, 24 each and
+ eight longword stores at offsets 68, 72, 76, 80, 84, 88, 92, 96 each. */
+
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,0\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,8\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,16\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,24\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,68\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,72\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,76\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,80\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,84\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,88\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,92\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,96\\\(\\\$16\\\)\\s" 1 } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c
new file mode 100644
index 0000000..1d2227e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+typedef unsigned int __attribute__ ((mode (DI))) int64_t;
+typedef unsigned int __attribute__ ((mode (SI))) int32_t;
+
+typedef union
+ {
+ int32_t l[8];
+ }
+val;
+
+typedef struct
+ {
+ int32_t l[2];
+ val v;
+ }
+tre;
+
+typedef struct
+ {
+ int32_t l[3];
+ tre t;
+ }
+due;
+
+typedef struct
+ {
+ val v;
+ int64_t q;
+ int32_t l[3];
+ due d;
+ }
+uno;
+
+void
+memcpy_nested_offset_quad (uno *u)
+{
+ u->d.t.v = u->v;
+}
+
+/* Expect assembly such as:
+
+ ldq $4,0($16)
+ ldq $3,8($16)
+ ldq $2,16($16)
+ ldq $1,24($16)
+ stq $4,72($16)
+ stq $3,80($16)
+ stq $2,88($16)
+ stq $1,96($16)
+
+ that is with four quadword loads at offsets 0, 8, 16, 24 each
+ and four quadword stores at offsets 72, 80, 88, 96 each. */
+
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,0\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,8\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,16\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,24\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,72\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,80\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,88\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,96\\\(\\\$16\\\)\\s" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c
new file mode 100644
index 0000000..ad9e07b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mfma -mavx2" } */
+/* { dg-final { scan-assembler-times {(?n)vfn?m(add|sub)[1-3]*ps} 2 } } */
+
+typedef float v4sf __attribute__((vector_size(16)));
+/* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a */
+
+v4sf
+foo (v4sf a, v4sf b)
+{
+ return a / b;
+}
diff --git a/gcc/testsuite/gcc.target/mips/clear-cache-1.c b/gcc/testsuite/gcc.target/mips/clear-cache-1.c
index f1554f5..cd11c66 100644
--- a/gcc/testsuite/gcc.target/mips/clear-cache-1.c
+++ b/gcc/testsuite/gcc.target/mips/clear-cache-1.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-msynci isa_rev>=2" } */
/* { dg-final { scan-assembler "\tsynci\t" } } */
-/* { dg-final { scan-assembler "\tjr.hb\t" } } */
+/* { dg-final { scan-assembler "\tjrc?.hb\t" } } */
/* { dg-final { scan-assembler-not "_flush_cache|mips_sync_icache|_cacheflush" } } */
NOMIPS16 void f()
diff --git a/gcc/testsuite/gcc.target/mips/memcpy-2.c b/gcc/testsuite/gcc.target/mips/memcpy-2.c
new file mode 100644
index 0000000..df0cd18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/memcpy-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "isa_rev<=5 -fdump-rtl-expand" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-Os" } { "" } } */
+
+__attribute__((nomips16))
+void
+f1 (char *p)
+{
+ __builtin_memcpy (p, "12345", 5);
+}
+
+/* { dg-final { scan-rtl-dump "mem/u.*mem/u" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/power11-3.c b/gcc/testsuite/gcc.target/powerpc/power11-3.c
index fa1aedd..56bf881 100644
--- a/gcc/testsuite/gcc.target/powerpc/power11-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/power11-3.c
@@ -1,5 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+/* { dg-require-ifunc "" } */
/* Check if we can set the power11 target via a target_clones attribute. */
diff --git a/gcc/testsuite/gcc.target/riscv/pr118410-1.c b/gcc/testsuite/gcc.target/riscv/pr118410-1.c
new file mode 100644
index 0000000..4a8b847
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr118410-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */
+/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */
+
+long orlow(long x) { return x | ((1L << 24) - 1); }
+
+/* { dg-final { scan-assembler-times "orn\t" 1 } } */
+/* { dg-final { scan-assembler-not "addi\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr118410-2.c b/gcc/testsuite/gcc.target/riscv/pr118410-2.c
new file mode 100644
index 0000000..b63a1d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr118410-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */
+/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */
+
+long xorlow(long x) { return x ^ ((1L << 24) - 1); }
+
+/* { dg-final { scan-assembler-times "xnor\t" 1 } } */
+/* { dg-final { scan-assembler-not "addi\t" } } */
diff --git a/gcc/testsuite/gcc.target/sh/pr111814.c b/gcc/testsuite/gcc.target/sh/pr111814.c
new file mode 100644
index 0000000..a88e5d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sh/pr111814.c
@@ -0,0 +1,7 @@
+/* Verify that __builtin_nan("") produces a constant matches
+ architecture specification. */
+/* { dg-do compile } */
+
+double d = __builtin_nan ("");
+
+/* { dg-final { scan-assembler "\t.long\t-1\n\t.long\t2146959359\n" } } */
diff --git a/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90 b/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90
index 0c8a6ad..a7fa7c3 100644
--- a/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90
+++ b/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90
@@ -18,7 +18,7 @@ program do_concurrent_all_clauses
squared = i * i
arr(i) = temp2 + squared
sum = sum + arr(i)
- max_val = max(max_val, arr(i)) ! { dg-error "Reference to impure function" }
+ max_val = max(max_val, arr(i))
end block
end do
print *, arr, sum, max_val
diff --git a/gcc/testsuite/gfortran.dg/pr119836_1.f90 b/gcc/testsuite/gfortran.dg/pr119836_1.f90
new file mode 100644
index 0000000..984e2d0
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr119836_1.f90
@@ -0,0 +1,18 @@
+!
+! { dg-do run }
+!
+! PR fortran/119836
+!
+program p
+ implicit none
+ integer, parameter :: n = 4
+ integer :: i
+ integer :: y(n), x(n)
+ do concurrent (i=1:n)
+ x(i) = shiftl (i,1) ! accepted
+ block
+ y(i) = shiftl (i,1) ! wrongly rejected
+ end block
+ end do
+ if (any(x /= y)) stop 1
+end program p
diff --git a/gcc/testsuite/gfortran.dg/pr119836_2.f90 b/gcc/testsuite/gfortran.dg/pr119836_2.f90
new file mode 100644
index 0000000..5e2d0c9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr119836_2.f90
@@ -0,0 +1,21 @@
+!
+! { dg-do compile }
+!
+! PR fortran/119836
+!
+! Although intrinsic functions contained within the Fortran standard
+! are pure procedures, many of the additional intrinsic functions
+! supplied in libgfortran are impure. RAND() is one such function.
+!
+program foo
+ implicit none
+ integer i
+ real x(4)
+ do concurrent (i=1:4)
+ x = rand() ! { dg-error "Reference to impure function" }
+ block
+ x = rand() ! { dg-error "Reference to impure function" }
+ end block
+ end do
+ print *, x
+end program foo
diff --git a/gcc/testsuite/gfortran.dg/pr119836_3.f90 b/gcc/testsuite/gfortran.dg/pr119836_3.f90
new file mode 100644
index 0000000..69a5fcf
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr119836_3.f90
@@ -0,0 +1,30 @@
+!
+! { dg-do run }
+!
+! PR fortran/119836
+!
+program p
+ implicit none
+ integer, parameter :: n = 4
+ integer :: i
+ integer :: y(n), x(n)
+ x = [(i,i=1,n)]
+ do concurrent (i=1:n)
+ call bar(x, y)
+ end do
+ if (any(x /= y)) stop 1
+ x = 2 * x
+ do concurrent (i=1:n)
+ block
+ call bar(x, y)
+ end block
+ end do
+ if (any(x /= y)) stop 1
+
+ contains
+ elemental subroutine bar(x, y)
+ integer, intent(in) :: x
+ integer, intent(out) :: y
+ y = x
+ end subroutine
+end program p
diff --git a/gcc/testsuite/gfortran.dg/pr119836_4.f90 b/gcc/testsuite/gfortran.dg/pr119836_4.f90
new file mode 100644
index 0000000..dc6f72b
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr119836_4.f90
@@ -0,0 +1,30 @@
+!
+! { dg-do compile }
+!
+! PR fortran/119836
+!
+program p
+ implicit none
+ integer, parameter :: n = 4
+ integer :: i
+ integer :: y(n), x(n)
+ x = [(i,i=1,n)]
+ do concurrent (i=1:n)
+ call bar(x, y) ! { dg-error "Subroutine call" }
+ end do
+ if (any(x /= y)) stop 1
+ x = 2 * x
+ do concurrent (i=1:n)
+ block
+ call bar(x, y) ! { dg-error "Subroutine call" }
+ end block
+ end do
+ if (any(x /= y)) stop 1
+
+ contains
+ subroutine bar(x, y)
+ integer, intent(in) :: x(:)
+ integer, intent(out) :: y(:)
+ y = x
+ end subroutine
+end program p
diff --git a/gcc/testsuite/rust/compile/nr2/compile.exp b/gcc/testsuite/rust/compile/nr2/compile.exp
index 4d91dd0..9e15cdd 100644
--- a/gcc/testsuite/rust/compile/nr2/compile.exp
+++ b/gcc/testsuite/rust/compile/nr2/compile.exp
@@ -19,6 +19,15 @@
# Load support procs.
load_lib rust-dg.exp
+# These tests don't run runtest_file_p consistently if it
+# doesn't return the same values, so disable parallelization
+# of this *.exp file. The first parallel runtest to reach
+# this will run all the tests serially.
+if ![gcc_parallel_test_run_p compile] {
+ return
+}
+gcc_parallel_test_enable 0
+
# Initialize `dg'.
dg-init
@@ -136,3 +145,5 @@ namespace eval rust-nr2-ns {
# All done.
dg-finish
+
+gcc_parallel_test_enable 1
diff --git a/gcc/vec.h b/gcc/vec.h
index 915df06..eae4b0f 100644
--- a/gcc/vec.h
+++ b/gcc/vec.h
@@ -2395,11 +2395,11 @@ public:
array_slice (vec<OtherT, A, vl_embed> *v)
: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {}
- iterator begin () { return m_base; }
- iterator end () { return m_base + m_size; }
+ iterator begin () { gcc_checking_assert (is_valid ()); return m_base; }
+ iterator end () { gcc_checking_assert (is_valid ()); return m_base + m_size; }
- const_iterator begin () const { return m_base; }
- const_iterator end () const { return m_base + m_size; }
+ const_iterator begin () const { gcc_checking_assert (is_valid ()); return m_base; }
+ const_iterator end () const { gcc_checking_assert (is_valid ()); return m_base + m_size; }
value_type &front ();
value_type &back ();
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 946bf13..66feed5 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,37 @@
+2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+ PR target/118257
+ * config/sh/sfp-machine.h (_FPU_GETCW): Implement with builtin.
+ (_FPU_SETCW): Likewise.
+ (FP_EX_ENABLE_SHIFT): Derive from arch spec.
+ (FP_EX_CAUSE_SHIFT): Likewise.
+ (FP_RND_MASK): Likewise.
+ (FP_EX_INVALID): Likewise.
+ (FP_EX_DIVZERO): Likewise.
+ (FP_EX_ALL): Likewise.
+ (FP_EX_OVERFLOW): Likewise.
+ (FP_EX_UNDERFLOW): Likewise.
+ (FP_EX_INEXACT): Likewise.
+ (_FP_DECL_EX): Declear default FCSR value.
+ (FP_RND_NEAREST): Derive from arch spec.
+ (FP_RND_ZERO): Likewise.
+ (FP_INIT_ROUNDMODE): Likewise.
+ (FP_ROUNDMODE): Likewise.
+ (FP_TRAPPING_EXCEPTIONS): Likewise.
+ (FP_HANDLE_EXCEPTIONS): Implement with _FPU_SETCW.
+
+2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+ PR target/111814
+ * config/sh/sfp-machine.h (_FP_NANFRAC_B): Reverse signaling bit.
+ (_FP_NANFRAC_H): Likewise.
+ (_FP_NANFRAC_S): Likewise.
+ (_FP_NANFRAC_D): Likewise.
+ (_FP_NANFRAC_Q): Likewise.
+ (_FP_KEEPNANFRACP): Enable for target.
+ (_FP_QNANNEGATEDP): Enable for target.
+ (_FP_CHOOSENAN): Port from MIPS.
+
2025-04-14 Thomas Schwinge <tschwinge@baylibre.com>
PR target/118794
diff --git a/libgcc/config/sh/sfp-machine.h b/libgcc/config/sh/sfp-machine.h
index 66984d4..8030c80 100644
--- a/libgcc/config/sh/sfp-machine.h
+++ b/libgcc/config/sh/sfp-machine.h
@@ -39,11 +39,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y)
#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
-#define _FP_NANFRAC_B _FP_QNANBIT_B
-#define _FP_NANFRAC_H _FP_QNANBIT_H
-#define _FP_NANFRAC_S _FP_QNANBIT_S
-#define _FP_NANFRAC_D _FP_QNANBIT_D, 0
-#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0
+#define _FP_NANFRAC_B (_FP_QNANBIT_B - 1)
+#define _FP_NANFRAC_H (_FP_QNANBIT_H - 1)
+#define _FP_NANFRAC_S (_FP_QNANBIT_S - 1)
+#define _FP_NANFRAC_D (_FP_QNANBIT_D - 1), -1
+#define _FP_NANFRAC_Q (_FP_QNANBIT_Q - 1), -1, -1, -1
/* The type of the result of a floating point comparison. This must
match __libgcc_cmp_return__ in GCC for the target. */
@@ -56,15 +56,71 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
#define _FP_NANSIGN_D 0
#define _FP_NANSIGN_Q 0
-#define _FP_KEEPNANFRACP 0
-#define _FP_QNANNEGATEDP 0
+#define _FP_KEEPNANFRACP 1
+#define _FP_QNANNEGATEDP 1
+
+/* X is chosen unless one of the NaNs is sNaN. */
+# define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
+ do { \
+ if ((_FP_FRAC_HIGH_RAW_##fs(X) | \
+ _FP_FRAC_HIGH_RAW_##fs(Y)) & _FP_QNANBIT_##fs) \
+ { \
+ R##_s = _FP_NANSIGN_##fs; \
+ _FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs); \
+ } \
+ else \
+ { \
+ R##_s = X##_s; \
+ _FP_FRAC_COPY_##wc(R,X); \
+ } \
+ R##_c = FP_CLS_NAN; \
+ } while (0)
+
+#ifdef __SH_FPU_ANY__
+#define _FPU_GETCW(fpscr) fpscr = __builtin_sh_get_fpscr ()
+#define _FPU_SETCW(fpscr) __builtin_sh_set_fpscr (fpscr)
+#define FP_EX_ENABLE_SHIFT 5
+#define FP_EX_CAUSE_SHIFT 10
-#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
- do { \
- R##_s = _FP_NANSIGN_##fs; \
- _FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs); \
- R##_c = FP_CLS_NAN; \
+#define FP_EX_INVALID 0x0040
+#define FP_EX_DIVZERO 0x0020
+#if defined (__SH2E__)
+#define FP_EX_ALL (FP_EX_DIVZERO | FP_EX_INVALID)
+#else
+#define FP_EX_OVERFLOW 0x0010
+#define FP_EX_UNDERFLOW 0x0008
+#define FP_EX_INEXACT 0x0004
+#define FP_EX_ALL (FP_EX_DIVZERO | FP_EX_INEXACT | \
+ FP_EX_INVALID | FP_EX_OVERFLOW | FP_EX_UNDERFLOW)
+#endif
+#define _FP_DECL_EX \
+ unsigned int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST
+/* Rounding modes. */
+#define FP_RND_NEAREST 0x0
+#define FP_RND_ZERO 0x1
+/* Placeholder, hardware does not have PINF/MINF modes. */
+#define FP_RND_PINF 0x2
+#define FP_RND_MINF 0x3
+#define FP_RND_MASK 3
+
+#define FP_INIT_ROUNDMODE _FPU_GETCW (_fcsr)
+#define FP_ROUNDMODE (_fcsr & FP_RND_MASK)
+#define FP_TRAPPING_EXCEPTIONS ((_fcsr >> FP_EX_ENABLE_SHIFT) & FP_EX_ALL)
+#define FP_HANDLE_EXCEPTIONS \
+ do { \
+ _fcsr &= ~(FP_EX_ALL << FP_EX_CAUSE_SHIFT); \
+ _fcsr |= _fex | (_fex << FP_EX_CAUSE_SHIFT); \
+ _FPU_SETCW (_fcsr); \
} while (0)
+#else
+#define FP_EX_INVALID (1 << 4)
+#define FP_EX_DIVZERO (1 << 3)
+#if !defined (__SH2E__)
+#define FP_EX_OVERFLOW (1 << 2)
+#define FP_EX_UNDERFLOW (1 << 1)
+#define FP_EX_INEXACT (1 << 0)
+#endif
+#endif
#define _FP_TININESS_AFTER_ROUNDING 1
diff --git a/libgcobol/ChangeLog b/libgcobol/ChangeLog
index 6a0e961..9de1714 100644
--- a/libgcobol/ChangeLog
+++ b/libgcobol/ChangeLog
@@ -1,3 +1,10 @@
+2025-04-21 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
+
+ * configure.ac: Check for struct tm.tm_zone.
+ * configure, config.h.in: Regenerate.
+ * intrinsic.cc (__gg__formatted_current_date): Guard tm.tm_zone
+ use with HAVE_STRUCT_TM_TM_ZONE.
+
2025-04-15 Andreas Schwab <schwab@suse.de>
* configure.tgt: Set LIBGCOBOL_SUPPORTED for riscv64-*-linux* with
diff --git a/libgcobol/config.h.in b/libgcobol/config.h.in
index 6a53279..fdf5e3e 100644
--- a/libgcobol/config.h.in
+++ b/libgcobol/config.h.in
@@ -72,6 +72,9 @@
/* Define to 1 if you have the `strtof128' function. */
#undef HAVE_STRTOF128
+/* Define to 1 if `tm_zone' is a member of `struct tm'. */
+#undef HAVE_STRUCT_TM_TM_ZONE
+
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
diff --git a/libgcobol/configure b/libgcobol/configure
index e83119d..6821591 100755
--- a/libgcobol/configure
+++ b/libgcobol/configure
@@ -2449,6 +2449,63 @@ $as_echo "$ac_res" >&6; }
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_cxx_check_func
+
+# ac_fn_cxx_check_member LINENO AGGR MEMBER VAR INCLUDES
+# ------------------------------------------------------
+# Tries to find if the field MEMBER exists in type AGGR, after including
+# INCLUDES, setting cache variable VAR accordingly.
+ac_fn_cxx_check_member ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5
+$as_echo_n "checking for $2.$3... " >&6; }
+if eval \${$4+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$5
+int
+main ()
+{
+static $2 ac_aggr;
+if (ac_aggr.$3)
+return 0;
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ eval "$4=yes"
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$5
+int
+main ()
+{
+static $2 ac_aggr;
+if (sizeof ac_aggr.$3)
+return 0;
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ eval "$4=yes"
+else
+ eval "$4=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$4
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_cxx_check_member
cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
@@ -11693,7 +11750,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11696 "configure"
+#line 11753 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -11799,7 +11856,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11802 "configure"
+#line 11859 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -17434,6 +17491,19 @@ $as_echo "#define USE_IEC_60559 1" >>confdefs.h
+# struct tm tm_zone is a POSIX.1-2024 addition.
+ac_fn_cxx_check_member "$LINENO" "struct tm" "tm_zone" "ac_cv_member_struct_tm_tm_zone" "#include <time.h>
+"
+if test "x$ac_cv_member_struct_tm_tm_zone" = xyes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STRUCT_TM_TM_ZONE 1
+_ACEOF
+
+
+fi
+
+
if test "${multilib}" = "yes"; then
multilib_arg="--enable-multilib"
else
diff --git a/libgcobol/configure.ac b/libgcobol/configure.ac
index a1e9513..4bb6905 100644
--- a/libgcobol/configure.ac
+++ b/libgcobol/configure.ac
@@ -231,6 +231,9 @@ elif test "${ENABLE_LIBQUADMATH_SUPPORT}" = "default" ; then
fi
LIBGCOBOL_CHECK_FLOAT128
+# struct tm tm_zone is a POSIX.1-2024 addition.
+AC_CHECK_MEMBERS([struct tm.tm_zone],,,[#include <time.h>])
+
if test "${multilib}" = "yes"; then
multilib_arg="--enable-multilib"
else
diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc
index 181b053..97f2bdc 100644
--- a/libgcobol/intrinsic.cc
+++ b/libgcobol/intrinsic.cc
@@ -1482,7 +1482,9 @@ __gg__formatted_current_date( cblc_field_t *dest, // Destination string
__gg__clock_gettime(CLOCK_REALTIME, &ts);
struct tm tm = {};
+#ifdef HAVE_STRUCT_TM_TM_ZONE
tm.tm_zone = "GMT";
+#endif
if( is_zulu )
{
gmtime_r(&ts.tv_sec, &tm);