aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog43
-rw-r--r--gcc/DATESTAMP2
-rw-r--r--gcc/config/i386/i386-expand.cc44
-rw-r--r--gcc/config/i386/i386.cc64
-rw-r--r--gcc/config/i386/i386.h6
-rw-r--r--gcc/config/i386/x86-tune-costs.h121
-rw-r--r--gcc/config/riscv/bitmanip.md38
-rw-r--r--gcc/config/riscv/riscv.cc3
-rw-r--r--gcc/cp/ChangeLog9
-rw-r--r--gcc/fortran/ChangeLog7
-rw-r--r--gcc/testsuite/ChangeLog47
-rw-r--r--gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c71
-rw-r--r--gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c74
-rw-r--r--gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c12
-rw-r--r--gcc/testsuite/gcc.target/riscv/pr118410-1.c9
-rw-r--r--gcc/testsuite/gcc.target/riscv/pr118410-2.c9
-rw-r--r--gcc/testsuite/rust/compile/nr2/compile.exp11
17 files changed, 541 insertions, 29 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 264bbd2..4a2a79f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,46 @@
+2025-04-19 Jeff Law <jlaw@ventanamicro.com>
+
+ PR target/119865
+ * config/riscv/riscv.cc (parse_features_for_version): Do not
+ explicitly free the architecture string.
+
+2025-04-19 Jeff Law <jlaw@ventanamicro.com>
+
+ PR target/118410
+ * config/riscv/bitmanip.md (logical with constant argument): New
+ splitter for cases where synthesizing ~C is cheaper than synthesizing
+ the original constant C.
+
+2025-04-19 Jan Hubicka <hubicka@ucw.cz>
+
+ * config/i386/i386.cc (vec_fp_conversion_cost): New function.
+ (ix86_rtx_costs): Use it for SSE/AVX FP conversoins.
+ (ix86_builtin_vectorization_cost): Fix indentation;
+ and use vec_fp_conversion_cost in vec_promote_demote.
+ (fp_conversion_stmt_cost): New function.
+ (ix86_vector_costs::add_stmt_cost): Use it to cost NOP_EXPR
+ and vec_promote_demote.
+ * config/i386/i386.h (struct processor_costs):
+ * config/i386/x86-tune-costs.h (struct processor_costs):
+
+2025-04-19 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR rtl-optimization/111949
+ * combine.cc (find_split_point): Add a split point
+ for `(and (not X) Y)` if not in the outer set already.
+
+2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+ PR target/111814
+ * config/sh/sh-modes.def (RESET_FLOAT_FORMAT): Use mips format.
+ (FLOAT_MODE): Use mips mode.
+
+2025-04-19 Maciej W. Rozycki <macro@orcam.me.uk>
+
+ * config/alpha/alpha.cc
+ (alpha_get_mem_rtx_alignment_and_offset): Recurse into
+ COMPONENT_REF nodes.
+
2025-04-18 Jeff Law <jlaw@ventanamicro.com>
* config/riscv/bitmanip.md (*bext<mode>_mask_pos): New pattern
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index f0d1b43..2aaf995 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20250419
+20250421
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index cdfd94d..36f71eb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -19256,8 +19256,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
e1 = gen_reg_rtx (mode);
x1 = gen_reg_rtx (mode);
- /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
-
b = force_reg (mode, b);
/* x0 = rcp(b) estimate */
@@ -19270,20 +19268,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
UNSPEC_RCP)));
- /* e0 = x0 * b */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+ unsigned vector_size = GET_MODE_SIZE (mode);
+
+ /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a
+ N-R step with 2 fma implementation. */
+ if (TARGET_FMA
+ || (TARGET_AVX512F && vector_size == 64)
+ || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+ {
+ /* e0 = x0 * a */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
+ /* e1 = e0 * b - a */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b,
+ gen_rtx_NEG (mode, a))));
+ /* res = - e1 * x0 + e0 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode,
+ gen_rtx_NEG (mode, e1),
+ x0, e0)));
+ }
+ else
+ /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+ {
+ /* e0 = x0 * b */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
- /* e0 = x0 * e0 */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
+ /* e1 = x0 + x0 */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
- /* e1 = x0 + x0 */
- emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
+ /* e0 = x0 * e0 */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
- /* x1 = e1 - e0 */
- emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
+ /* x1 = e1 - e0 */
+ emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
- /* res = a * x1 */
- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ /* res = a * x1 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ }
}
/* Output code to perform a Newton-Rhapson approximation of a
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 38df84f..28603c2 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see
#include "i386-features.h"
#include "function-abi.h"
#include "rtl-error.h"
+#include "gimple-pretty-print.h"
/* This file should be included last. */
#include "target-def.h"
@@ -21816,6 +21817,25 @@ ix86_insn_cost (rtx_insn *insn, bool speed)
return insn_cost + pattern_cost (PATTERN (insn), speed);
}
+/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
+
+static int
+vec_fp_conversion_cost (const struct processor_costs *cost, int size)
+{
+ if (size < 128)
+ return cost->cvtss2sd;
+ else if (size < 256)
+ {
+ if (TARGET_SSE_SPLIT_REGS)
+ return cost->cvtss2sd * size / 64;
+ return cost->cvtss2sd;
+ }
+ if (size < 512)
+ return cost->vcvtps2pd256;
+ else
+ return cost->vcvtps2pd512;
+}
+
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result. */
@@ -22479,17 +22499,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
return false;
case FLOAT_EXTEND:
+ /* x87 represents all values extended to 80bit. */
if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
*total = 0;
else
- *total = ix86_vec_cost (mode, cost->addss);
+ *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
return false;
case FLOAT_TRUNCATE:
if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
*total = cost->fadd;
else
- *total = ix86_vec_cost (mode, cost->addss);
+ *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
return false;
case ABS:
@@ -24683,7 +24704,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
switch (type_of_cost)
{
case scalar_stmt:
- return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
+ return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
case scalar_load:
/* load/store costs are relative to register move which is 2. Recompute
@@ -24754,7 +24775,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return ix86_cost->cond_not_taken_branch_cost;
case vec_perm:
+ return ix86_vec_cost (mode, ix86_cost->sse_op);
+
case vec_promote_demote:
+ if (fp)
+ return vec_fp_conversion_cost (ix86_tune_cost, mode);
return ix86_vec_cost (mode, ix86_cost->sse_op);
case vec_construct:
@@ -25232,6 +25257,32 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
return new ix86_vector_costs (vinfo, costing_for_scalar);
}
+/* Return cost of statement doing FP conversion. */
+
+static unsigned
+fp_conversion_stmt_cost (machine_mode mode, gimple *stmt, bool scalar_p)
+{
+ int outer_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_lhs (stmt))));
+ int inner_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_rhs1 (stmt))));
+ int stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
+ up doing two conversions and packing them. */
+ if (!scalar_p && inner_size > outer_size)
+ {
+ int n = inner_size / outer_size;
+ stmt_cost = stmt_cost * n
+ + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ return stmt_cost;
+}
+
unsigned
ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree node,
@@ -25342,6 +25393,9 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
(TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
stmt_cost = 0;
+ else if (fp)
+ stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt,
+ scalar_p);
break;
case BIT_IOR_EXPR:
@@ -25383,6 +25437,10 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
break;
}
+ if (kind == vec_promote_demote
+ && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
+ stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt, scalar_p);
+
/* If we do elementwise loads into a vector then we are bound by
latency and execution resources for the many scalar loads
(AGU and load ports). Try to account for this by scaling the
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8507243..18aa42d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -207,6 +207,12 @@ struct processor_costs {
const int divsd; /* cost of DIVSD instructions. */
const int sqrtss; /* cost of SQRTSS instructions. */
const int sqrtsd; /* cost of SQRTSD instructions. */
+ const int cvtss2sd; /* cost SSE FP conversions,
+ such as CVTSS2SD. */
+ const int vcvtps2pd256; /* cost 256bit packed FP conversions,
+ such as VCVTPD2PS with larger reg in ymm. */
+ const int vcvtps2pd512; /* cost 512bit packed FP conversions,
+ such as VCVTPD2PS with larger reg in zmm. */
const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
/* Specify reassociation width for integer,
fp, vector integer and vector fp
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 9477345..cddcf61 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -121,16 +121,19 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (2), /* cost of FCHS instruction. */
COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
- COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */
- COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */
- COSTS_N_BYTES (2), /* cost of MULSS instruction. */
- COSTS_N_BYTES (2), /* cost of MULSD instruction. */
- COSTS_N_BYTES (2), /* cost of FMA SS instruction. */
- COSTS_N_BYTES (2), /* cost of FMA SD instruction. */
- COSTS_N_BYTES (2), /* cost of DIVSS instruction. */
- COSTS_N_BYTES (2), /* cost of DIVSD instruction. */
- COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */
- COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */
+ COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */
+ COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_BYTES (4), /* cost of MULSS instruction. */
+ COSTS_N_BYTES (4), /* cost of MULSD instruction. */
+ COSTS_N_BYTES (4), /* cost of FMA SS instruction. */
+ COSTS_N_BYTES (4), /* cost of FMA SD instruction. */
+ COSTS_N_BYTES (4), /* cost of DIVSS instruction. */
+ COSTS_N_BYTES (4), /* cost of DIVSD instruction. */
+ COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */
+ COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */
+ COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
ix86_size_memcpy,
ix86_size_memset,
@@ -243,6 +246,9 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (88), /* cost of DIVSD instruction. */
COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i386_memcpy,
i386_memset,
@@ -356,6 +362,9 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (74), /* cost of DIVSD instruction. */
COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i486_memcpy,
i486_memset,
@@ -467,6 +476,9 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (39), /* cost of DIVSD instruction. */
COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -571,6 +583,9 @@ struct processor_costs lakemont_cost = {
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -690,6 +705,9 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (18), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentiumpro_memcpy,
pentiumpro_memset,
@@ -800,6 +818,9 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (47), /* cost of DIVSD instruction. */
COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
geode_memcpy,
geode_memset,
@@ -913,6 +934,9 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (56), /* cost of DIVSD instruction. */
COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k6_memcpy,
k6_memset,
@@ -1027,6 +1051,9 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (24), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
athlon_memcpy,
athlon_memset,
@@ -1150,6 +1177,9 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k8_memcpy,
k8_memset,
@@ -1281,6 +1311,9 @@ struct processor_costs amdfam10_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
amdfam10_memcpy,
amdfam10_memset,
@@ -1405,6 +1438,9 @@ const struct processor_costs bdver_cost = {
COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver_memcpy,
bdver_memset,
@@ -1553,6 +1589,10 @@ struct processor_costs znver1_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ /* Real latency is 4, but for split regs multiply cost of half op by 2. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
and it can execute 2 integer additions and 2 multiplications thus
reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
@@ -1712,6 +1752,9 @@ struct processor_costs znver2_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1847,6 +1890,9 @@ struct processor_costs znver3_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1984,6 +2030,10 @@ struct processor_costs znver4_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ /* Real latency is 6, but for split regs multiply cost of half op by 2. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -2135,6 +2185,9 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
/* DIVSD has throughtput 0.13 and latency 20. */
COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
/* Zen5 can execute:
- integer ops: 6 per cycle, at most 3 multiplications.
latency 1 for additions, 3 for multiplications (pipelined)
@@ -2274,6 +2327,9 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
skylake_memcpy,
skylake_memset,
@@ -2403,6 +2459,9 @@ struct processor_costs icelake_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
icelake_memcpy,
icelake_memset,
@@ -2526,6 +2585,9 @@ struct processor_costs alderlake_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
alderlake_memcpy,
alderlake_memset,
@@ -2642,6 +2704,9 @@ const struct processor_costs btver1_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver1_memcpy,
btver1_memset,
@@ -2755,6 +2820,9 @@ const struct processor_costs btver2_cost = {
COSTS_N_INSNS (19), /* cost of DIVSD instruction. */
COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver2_memcpy,
btver2_memset,
@@ -2867,6 +2935,9 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (38), /* cost of DIVSD instruction. */
COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium4_memcpy,
pentium4_memset,
@@ -2982,6 +3053,9 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (40), /* cost of DIVSD instruction. */
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
nocona_memcpy,
nocona_memset,
@@ -3095,6 +3169,9 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
atom_memcpy,
atom_memset,
@@ -3208,6 +3285,9 @@ struct processor_costs slm_cost = {
COSTS_N_INSNS (69), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
slm_memcpy,
slm_memset,
@@ -3335,6 +3415,9 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
tremont_memcpy,
tremont_memset,
@@ -3448,6 +3531,9 @@ struct processor_costs intel_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
intel_memcpy,
intel_memset,
@@ -3566,6 +3652,9 @@ struct processor_costs lujiazui_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
lujiazui_memcpy,
lujiazui_memset,
@@ -3682,6 +3771,9 @@ struct processor_costs yongfeng_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
yongfeng_memcpy,
yongfeng_memset,
@@ -3798,6 +3890,9 @@ struct processor_costs shijidadao_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
shijidadao_memcpy,
shijidadao_memset,
@@ -3922,6 +4017,9 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
generic_memcpy,
generic_memset,
@@ -4051,6 +4149,9 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (32), /* cost of DIVSD instruction. */
COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
core_memcpy,
core_memset,
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 2a3884c..d0919ec 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1263,3 +1263,41 @@
expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands);
DONE;
})
+
+;; If we have an XOR/IOR with a constant operand (C) and the we can
+;; synthesize ~C more efficiently than C, then synthesize ~C and use
+;; xnor/orn instead.
+;;
+;; The same can be done for AND, but mvconst_internal's issues get in
+;; the way. That's future work.
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (any_or:X (match_operand:X 1 "register_operand")
+ (match_operand:X 2 "const_int_operand")))
+ (clobber (match_operand:X 3 "register_operand"))]
+ "TARGET_ZBB
+ && (riscv_const_insns (operands[2], true)
+ > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))"
+ [(const_int 0)]
+{
+ /* Get the inverted constant into the temporary register. */
+ riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2])));
+
+ /* For xnor, the NOT operation is in a different position. So
+ we have to customize the split code we generate a bit.
+
+ It is expected that AND will be handled like IOR in the future. */
+ if (<CODE> == XOR)
+ {
+ rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]);
+ x = gen_rtx_NOT (<X:MODE>mode, x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ }
+ else
+ {
+ rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]);
+ x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ }
+ DONE;
+})
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d3656a7..bad59e2 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -13136,9 +13136,6 @@ parse_features_for_version (tree decl,
DECL_SOURCE_LOCATION (decl));
gcc_assert (parse_res);
- if (arch_string != default_opts->x_riscv_arch_string)
- free (CONST_CAST (void *, (const void *) arch_string));
-
cl_target_option_restore (&global_options, &global_options_set,
&cur_target);
}
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 644b36a..6975efb 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,12 @@
+2025-04-19 Jason Merrill <jason@redhat.com>
+
+ * coroutines.cc (coro_build_expr_stmt)
+ (coro_build_cvt_void_expr_stmt): Remove.
+ (build_actor_fn): Use finish_expr_stmt.
+ * semantics.cc (finish_expr_stmt): Avoid wrapping statement in
+ EXPR_STMT.
+ (finish_stmt_expr_expr): Add comment.
+
2025-04-17 Jason Merrill <jason@redhat.com>
* constexpr.cc (is_valid_constexpr_fn): Improve diagnostic.
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 1c45bdb..56325a9 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,10 @@
+2025-04-19 Steven G. Kargl <kargl@gcc.gnu.org>
+
+ PR fortran/119836
+ * resolve.cc (check_pure_function): Fix checking for
+ an impure subprogram within a DO CONCURRENT construct.
+ (pure_subroutine): Ditto.
+
2025-04-16 Harald Anlauf <anlauf@gmx.de>
PR fortran/106948
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4cdc9c1..521176a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,50 @@
+2025-04-20 H.J. Lu <hjl.tools@gmail.com>
+
+ PR target/117863
+ * gcc.dg/rtl/i386/vector_eq-2.c: New test.
+ * gcc.dg/rtl/i386/vector_eq-3.c: Likewise.
+
+2025-04-19 Thomas Schwinge <tschwinge@baylibre.com>
+
+ PR testsuite/119508
+ * rust/compile/nr2/compile.exp: Disable parallel testing.
+
+2025-04-19 Co-authored-by: Jeff Law <jlaw@ventanamicro.com>
+
+ PR target/118410
+ * gcc.target/riscv/pr118410-1.c: New test.
+ * gcc.target/riscv/pr118410-2.c: Likewise.
+
+2025-04-19 Andrew Pinski <quic_apinski@quicinc.com>
+
+ * gcc.dg/pr118947-1.c: Use 1025 as the size of the buf.
+ * gcc.dg/pr78408-3.c: Likewise.
+
+2025-04-19 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR rtl-optimization/111949
+ * gcc.target/aarch64/bic-1.c: New test.
+
+2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+ PR target/111814
+ * gcc.target/sh/pr111814.c: New test.
+
+2025-04-19 Maciej W. Rozycki <macro@orcam.me.uk>
+
+ * gcc.target/alpha/memcpy-nested-offset-long.c: New file.
+ * gcc.target/alpha/memcpy-nested-offset-quad.c: New file.
+
+2025-04-19 Steven G. Kargl <kargl@gcc.gnu.org>
+
+ PR fortran/119836
+ * gfortran.dg/do_concurrent_all_clauses.f90: Remove invalid
+ dg-error test.
+ * gfortran.dg/pr119836_1.f90: New test.
+ * gfortran.dg/pr119836_2.f90: New test.
+ * gfortran.dg/pr119836_3.f90: New test.
+ * gfortran.dg/pr119836_4.f90: New test.
+
2025-04-18 Thomas Schwinge <tschwinge@baylibre.com>
PR cobol/119818
diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
new file mode 100644
index 0000000..871d489
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
@@ -0,0 +1,71 @@
+/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-additional-options "-O2 -march=x86-64-v3" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v2di __attribute__((vector_size(16)));
+
+v4si __RTL (startwith ("vregs1")) foo1 (void)
+{
+(function "foo1"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 4 (set (reg:V4SI <1>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 5 (set (reg:V4SI <2>)
+ (eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>))))
+ (cinsn 6 (set (reg:V4SI <3>) (reg:V4SI <2>)))
+ (cinsn 7 (set (reg:V4SI xmm0) (reg:V4SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V4SI xmm0)))
+)
+}
+
+v8si __RTL (startwith ("vregs1")) foo2 (void)
+{
+(function "foo2"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V8SI <0>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 4 (set (reg:V8SI <1>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 5 (set (reg:V8SI <2>)
+ (eq:V8SI (reg:V8SI <0>) (reg:V8SI <1>))))
+ (cinsn 6 (set (reg:V8SI <3>) (reg:V8SI <2>)))
+ (cinsn 7 (set (reg:V8SI xmm0) (reg:V8SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V8SI xmm0)))
+)
+}
+
+v2di __RTL (startwith ("vregs1")) foo3 (void)
+{
+(function "foo3"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V2DI <0>) (const_vector:V2DI [(const_int -1) (const_int -1)])))
+ (cinsn 4 (set (reg:V2DI <1>) (const_vector:V2DI [(const_int -1) (const_int -1)])))
+ (cinsn 5 (set (reg:V2DI <2>)
+ (eq:V2DI (reg:V2DI <0>) (reg:V2DI <1>))))
+ (cinsn 6 (set (reg:V2DI <3>) (reg:V2DI <2>)))
+ (cinsn 7 (set (reg:V2DI xmm0) (reg:V2DI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V2DI xmm0)))
+)
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */
diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c
new file mode 100644
index 0000000..276c4c2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c
@@ -0,0 +1,74 @@
+/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-additional-options "-O2 -march=x86-64-v3" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v2di __attribute__((vector_size(16)));
+
+v4si __RTL (startwith ("vregs1")) foo1 (void)
+{
+(function "foo1"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V4SI <1>)
+ (mem:V4SI (reg:SI di) [0 ptr S128 A128])))
+ (cinsn 4 (set (reg:V4SI <2>)
+ (eq:V4SI (reg:V4SI <1>)
+ (mem:V4SI (reg:SI di) [0 ptr S128 A128]))))
+ (cinsn 5 (set (reg:V4SI <3>) (reg:V4SI <2>)))
+ (cinsn 6 (set (reg:V4SI xmm0) (reg:V4SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V4SI xmm0)))
+)
+}
+
+v8si __RTL (startwith ("vregs1")) foo2 (void)
+{
+(function "foo2"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V8SI <1>)
+ (mem:V8SI (reg:SI di) [0 ptr S256 A256])))
+ (cinsn 4 (set (reg:V8SI <2>)
+ (eq:V8SI (mem:V8SI (reg:SI di) [0 ptr S256 A256])
+ (reg:V8SI <1>))))
+ (cinsn 5 (set (reg:V8SI <3>) (reg:V8SI <2>)))
+ (cinsn 6 (set (reg:V8SI xmm0) (reg:V8SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V8SI xmm0)))
+)
+}
+
+v2di __RTL (startwith ("vregs1")) foo3 (void)
+{
+(function "foo3"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V2DI <1>)
+ (mem:V2DI (reg:SI di) [0 ptr S128 A128])))
+ (cinsn 4 (set (reg:V2DI <2>)
+ (eq:V2DI (reg:V2DI <1>)
+ (mem:V2DI (reg:SI di) [0 ptr S128 A128]))))
+ (cinsn 5 (set (reg:V2DI <3>) (reg:V2DI <2>)))
+ (cinsn 6 (set (reg:V2DI xmm0) (reg:V2DI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V2DI xmm0)))
+)
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */
diff --git a/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c
new file mode 100644
index 0000000..ad9e07b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mfma -mavx2" } */
+/* { dg-final { scan-assembler-times {(?n)vfn?m(add|sub)[1-3]*ps} 2 } } */
+
+typedef float v4sf __attribute__((vector_size(16)));
+/* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a */
+
+v4sf
+foo (v4sf a, v4sf b)
+{
+ return a / b;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr118410-1.c b/gcc/testsuite/gcc.target/riscv/pr118410-1.c
new file mode 100644
index 0000000..4a8b847
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr118410-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */
+/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */
+
+long orlow(long x) { return x | ((1L << 24) - 1); }
+
+/* { dg-final { scan-assembler-times "orn\t" 1 } } */
+/* { dg-final { scan-assembler-not "addi\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr118410-2.c b/gcc/testsuite/gcc.target/riscv/pr118410-2.c
new file mode 100644
index 0000000..b63a1d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr118410-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */
+/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */
+
+long xorlow(long x) { return x ^ ((1L << 24) - 1); }
+
+/* { dg-final { scan-assembler-times "xnor\t" 1 } } */
+/* { dg-final { scan-assembler-not "addi\t" } } */
diff --git a/gcc/testsuite/rust/compile/nr2/compile.exp b/gcc/testsuite/rust/compile/nr2/compile.exp
index 4d91dd0..9e15cdd 100644
--- a/gcc/testsuite/rust/compile/nr2/compile.exp
+++ b/gcc/testsuite/rust/compile/nr2/compile.exp
@@ -19,6 +19,15 @@
# Load support procs.
load_lib rust-dg.exp
+# These tests don't run runtest_file_p consistently if it
+# doesn't return the same values, so disable parallelization
+# of this *.exp file. The first parallel runtest to reach
+# this will run all the tests serially.
+if ![gcc_parallel_test_run_p compile] {
+ return
+}
+gcc_parallel_test_enable 0
+
# Initialize `dg'.
dg-init
@@ -136,3 +145,5 @@ namespace eval rust-nr2-ns {
# All done.
dg-finish
+
+gcc_parallel_test_enable 1