diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 43 | ||||
-rw-r--r-- | gcc/DATESTAMP | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 44 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 64 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 6 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune-costs.h | 121 | ||||
-rw-r--r-- | gcc/config/riscv/bitmanip.md | 38 | ||||
-rw-r--r-- | gcc/config/riscv/riscv.cc | 3 | ||||
-rw-r--r-- | gcc/cp/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/fortran/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 47 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c | 71 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c | 74 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/pr118410-1.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/pr118410-2.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/nr2/compile.exp | 11 |
17 files changed, 541 insertions, 29 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 264bbd2..4a2a79f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,46 @@ +2025-04-19 Jeff Law <jlaw@ventanamicro.com> + + PR target/119865 + * config/riscv/riscv.cc (parse_features_for_version): Do not + explicitly free the architecture string. + +2025-04-19 Jeff Law <jlaw@ventanamicro.com> + + PR target/118410 + * config/riscv/bitmanip.md (logical with constant argument): New + splitter for cases where synthesizing ~C is cheaper than synthesizing + the original constant C. + +2025-04-19 Jan Hubicka <hubicka@ucw.cz> + + * config/i386/i386.cc (vec_fp_conversion_cost): New function. + (ix86_rtx_costs): Use it for SSE/AVX FP conversoins. + (ix86_builtin_vectorization_cost): Fix indentation; + and use vec_fp_conversion_cost in vec_promote_demote. + (fp_conversion_stmt_cost): New function. + (ix86_vector_costs::add_stmt_cost): Use it to cost NOP_EXPR + and vec_promote_demote. + * config/i386/i386.h (struct processor_costs): + * config/i386/x86-tune-costs.h (struct processor_costs): + +2025-04-19 Andrew Pinski <quic_apinski@quicinc.com> + + PR rtl-optimization/111949 + * combine.cc (find_split_point): Add a split point + for `(and (not X) Y)` if not in the outer set already. + +2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com> + + PR target/111814 + * config/sh/sh-modes.def (RESET_FLOAT_FORMAT): Use mips format. + (FLOAT_MODE): Use mips mode. + +2025-04-19 Maciej W. Rozycki <macro@orcam.me.uk> + + * config/alpha/alpha.cc + (alpha_get_mem_rtx_alignment_and_offset): Recurse into + COMPONENT_REF nodes. + 2025-04-18 Jeff Law <jlaw@ventanamicro.com> * config/riscv/bitmanip.md (*bext<mode>_mask_pos): New pattern diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index f0d1b43..2aaf995 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250419 +20250421 diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index cdfd94d..36f71eb 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -19256,8 +19256,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) e1 = gen_reg_rtx (mode); x1 = gen_reg_rtx (mode); - /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ - b = force_reg (mode, b); /* x0 = rcp(b) estimate */ @@ -19270,20 +19268,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), UNSPEC_RCP))); - /* e0 = x0 * b */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); + unsigned vector_size = GET_MODE_SIZE (mode); + + /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a + N-R step with 2 fma implementation. */ + if (TARGET_FMA + || (TARGET_AVX512F && vector_size == 64) + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) + { + /* e0 = x0 * a */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); + /* e1 = e0 * b - a */ + emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b, + gen_rtx_NEG (mode, a)))); + /* res = - e1 * x0 + e0 */ + emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, e1), + x0, e0))); + } + else + /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ + { + /* e0 = x0 * b */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); - /* e0 = x0 * e0 */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); + /* e1 = x0 + x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); - /* e1 = x0 + x0 */ - emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); + /* e0 = x0 * e0 */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); - /* x1 = e1 - e0 */ - emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); + /* x1 = e1 - e0 */ + emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); - /* res = a * x1 */ - emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + /* res = a * x1 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + } } /* Output code to perform a Newton-Rhapson approximation of a diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 38df84f..28603c2 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see #include "i386-features.h" #include "function-abi.h" #include "rtl-error.h" +#include "gimple-pretty-print.h" /* This file should be included last. */ #include "target-def.h" @@ -21816,6 +21817,25 @@ ix86_insn_cost (rtx_insn *insn, bool speed) return insn_cost + pattern_cost (PATTERN (insn), speed); } +/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */ + +static int +vec_fp_conversion_cost (const struct processor_costs *cost, int size) +{ + if (size < 128) + return cost->cvtss2sd; + else if (size < 256) + { + if (TARGET_SSE_SPLIT_REGS) + return cost->cvtss2sd * size / 64; + return cost->cvtss2sd; + } + if (size < 512) + return cost->vcvtps2pd256; + else + return cost->vcvtps2pd512; +} + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ @@ -22479,17 +22499,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, return false; case FLOAT_EXTEND: + /* x87 represents all values extended to 80bit. */ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = 0; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; case FLOAT_TRUNCATE: if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = cost->fadd; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; case ABS: @@ -24683,7 +24704,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, switch (type_of_cost) { case scalar_stmt: - return fp ? ix86_cost->addss : COSTS_N_INSNS (1); + return fp ? ix86_cost->addss : COSTS_N_INSNS (1); case scalar_load: /* load/store costs are relative to register move which is 2. Recompute @@ -24754,7 +24775,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return ix86_cost->cond_not_taken_branch_cost; case vec_perm: + return ix86_vec_cost (mode, ix86_cost->sse_op); + case vec_promote_demote: + if (fp) + return vec_fp_conversion_cost (ix86_tune_cost, mode); return ix86_vec_cost (mode, ix86_cost->sse_op); case vec_construct: @@ -25232,6 +25257,32 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) return new ix86_vector_costs (vinfo, costing_for_scalar); } +/* Return cost of statement doing FP conversion. */ + +static unsigned +fp_conversion_stmt_cost (machine_mode mode, gimple *stmt, bool scalar_p) +{ + int outer_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_lhs (stmt)))); + int inner_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_rhs1 (stmt)))); + int stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end + up doing two conversions and packing them. */ + if (!scalar_p && inner_size > outer_size) + { + int n = inner_size / outer_size; + stmt_cost = stmt_cost * n + + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op); + } + return stmt_cost; +} + unsigned ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, stmt_vec_info stmt_info, slp_tree node, @@ -25342,6 +25393,9 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) stmt_cost = 0; + else if (fp) + stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt, + scalar_p); break; case BIT_IOR_EXPR: @@ -25383,6 +25437,10 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, break; } + if (kind == vec_promote_demote + && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) + stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt, scalar_p); + /* If we do elementwise loads into a vector then we are bound by latency and execution resources for the many scalar loads (AGU and load ports). Try to account for this by scaling the diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8507243..18aa42d 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -207,6 +207,12 @@ struct processor_costs { const int divsd; /* cost of DIVSD instructions. */ const int sqrtss; /* cost of SQRTSS instructions. */ const int sqrtsd; /* cost of SQRTSD instructions. */ + const int cvtss2sd; /* cost SSE FP conversions, + such as CVTSS2SD. */ + const int vcvtps2pd256; /* cost 256bit packed FP conversions, + such as VCVTPD2PS with larger reg in ymm. */ + const int vcvtps2pd512; /* cost 512bit packed FP conversions, + such as VCVTPD2PS with larger reg in zmm. */ const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp; /* Specify reassociation width for integer, fp, vector integer and vector fp diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 9477345..cddcf61 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -121,16 +121,19 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of FCHS instruction. */ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ - COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */ - COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */ - COSTS_N_BYTES (2), /* cost of MULSS instruction. */ - COSTS_N_BYTES (2), /* cost of MULSD instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SS instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SD instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSS instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSD instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */ + COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_BYTES (4), /* cost of MULSS instruction. */ + COSTS_N_BYTES (4), /* cost of MULSD instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SS instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SD instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSS instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSD instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */ + COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, @@ -243,6 +246,9 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (88), /* cost of DIVSD instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, @@ -356,6 +362,9 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (74), /* cost of DIVSD instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, @@ -467,6 +476,9 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (39), /* cost of DIVSD instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -571,6 +583,9 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -690,6 +705,9 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (18), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, @@ -800,6 +818,9 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (47), /* cost of DIVSD instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, @@ -913,6 +934,9 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (56), /* cost of DIVSD instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, @@ -1027,6 +1051,9 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (24), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, @@ -1150,6 +1177,9 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, @@ -1281,6 +1311,9 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, @@ -1405,6 +1438,9 @@ const struct processor_costs bdver_cost = { COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver_memcpy, bdver_memset, @@ -1553,6 +1589,10 @@ struct processor_costs znver1_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + /* Real latency is 4, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests @@ -1712,6 +1752,9 @@ struct processor_costs znver2_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1847,6 +1890,9 @@ struct processor_costs znver3_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1984,6 +2030,10 @@ struct processor_costs znver4_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + /* Real latency is 6, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2135,6 +2185,9 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ /* DIVSD has throughtput 0.13 and latency 20. */ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ /* Zen5 can execute: - integer ops: 6 per cycle, at most 3 multiplications. latency 1 for additions, 3 for multiplications (pipelined) @@ -2274,6 +2327,9 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ skylake_memcpy, skylake_memset, @@ -2403,6 +2459,9 @@ struct processor_costs icelake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ icelake_memcpy, icelake_memset, @@ -2526,6 +2585,9 @@ struct processor_costs alderlake_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ alderlake_memcpy, alderlake_memset, @@ -2642,6 +2704,9 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, @@ -2755,6 +2820,9 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (19), /* cost of DIVSD instruction. */ COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, @@ -2867,6 +2935,9 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (38), /* cost of DIVSD instruction. */ COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, @@ -2982,6 +3053,9 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (40), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, @@ -3095,6 +3169,9 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, @@ -3208,6 +3285,9 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (69), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, @@ -3335,6 +3415,9 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ tremont_memcpy, tremont_memset, @@ -3448,6 +3531,9 @@ struct processor_costs intel_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ intel_memcpy, intel_memset, @@ -3566,6 +3652,9 @@ struct processor_costs lujiazui_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ lujiazui_memcpy, lujiazui_memset, @@ -3682,6 +3771,9 @@ struct processor_costs yongfeng_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ yongfeng_memcpy, yongfeng_memset, @@ -3798,6 +3890,9 @@ struct processor_costs shijidadao_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ shijidadao_memcpy, shijidadao_memset, @@ -3922,6 +4017,9 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, @@ -4051,6 +4149,9 @@ struct processor_costs core_cost = { COSTS_N_INSNS (32), /* cost of DIVSD instruction. */ COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset, diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 2a3884c..d0919ec 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -1263,3 +1263,41 @@ expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands); DONE; }) + +;; If we have an XOR/IOR with a constant operand (C) and the we can +;; synthesize ~C more efficiently than C, then synthesize ~C and use +;; xnor/orn instead. +;; +;; The same can be done for AND, but mvconst_internal's issues get in +;; the way. That's future work. +(define_split + [(set (match_operand:X 0 "register_operand") + (any_or:X (match_operand:X 1 "register_operand") + (match_operand:X 2 "const_int_operand"))) + (clobber (match_operand:X 3 "register_operand"))] + "TARGET_ZBB + && (riscv_const_insns (operands[2], true) + > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))" + [(const_int 0)] +{ + /* Get the inverted constant into the temporary register. */ + riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2]))); + + /* For xnor, the NOT operation is in a different position. So + we have to customize the split code we generate a bit. + + It is expected that AND will be handled like IOR in the future. */ + if (<CODE> == XOR) + { + rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]); + x = gen_rtx_NOT (<X:MODE>mode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + } + else + { + rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]); + x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]); + emit_insn (gen_rtx_SET (operands[0], x)); + } + DONE; +}) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index d3656a7..bad59e2 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -13136,9 +13136,6 @@ parse_features_for_version (tree decl, DECL_SOURCE_LOCATION (decl)); gcc_assert (parse_res); - if (arch_string != default_opts->x_riscv_arch_string) - free (CONST_CAST (void *, (const void *) arch_string)); - cl_target_option_restore (&global_options, &global_options_set, &cur_target); } diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 644b36a..6975efb 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,12 @@ +2025-04-19 Jason Merrill <jason@redhat.com> + + * coroutines.cc (coro_build_expr_stmt) + (coro_build_cvt_void_expr_stmt): Remove. + (build_actor_fn): Use finish_expr_stmt. + * semantics.cc (finish_expr_stmt): Avoid wrapping statement in + EXPR_STMT. + (finish_stmt_expr_expr): Add comment. + 2025-04-17 Jason Merrill <jason@redhat.com> * constexpr.cc (is_valid_constexpr_fn): Improve diagnostic. diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 1c45bdb..56325a9 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,10 @@ +2025-04-19 Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/119836 + * resolve.cc (check_pure_function): Fix checking for + an impure subprogram within a DO CONCURRENT construct. + (pure_subroutine): Ditto. + 2025-04-16 Harald Anlauf <anlauf@gmx.de> PR fortran/106948 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4cdc9c1..521176a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,50 @@ +2025-04-20 H.J. Lu <hjl.tools@gmail.com> + + PR target/117863 + * gcc.dg/rtl/i386/vector_eq-2.c: New test. + * gcc.dg/rtl/i386/vector_eq-3.c: Likewise. + +2025-04-19 Thomas Schwinge <tschwinge@baylibre.com> + + PR testsuite/119508 + * rust/compile/nr2/compile.exp: Disable parallel testing. + +2025-04-19 Co-authored-by: Jeff Law <jlaw@ventanamicro.com> + + PR target/118410 + * gcc.target/riscv/pr118410-1.c: New test. + * gcc.target/riscv/pr118410-2.c: Likewise. + +2025-04-19 Andrew Pinski <quic_apinski@quicinc.com> + + * gcc.dg/pr118947-1.c: Use 1025 as the size of the buf. + * gcc.dg/pr78408-3.c: Likewise. + +2025-04-19 Andrew Pinski <quic_apinski@quicinc.com> + + PR rtl-optimization/111949 + * gcc.target/aarch64/bic-1.c: New test. + +2025-04-19 Jiaxun Yang <jiaxun.yang@flygoat.com> + + PR target/111814 + * gcc.target/sh/pr111814.c: New test. + +2025-04-19 Maciej W. Rozycki <macro@orcam.me.uk> + + * gcc.target/alpha/memcpy-nested-offset-long.c: New file. + * gcc.target/alpha/memcpy-nested-offset-quad.c: New file. + +2025-04-19 Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/119836 + * gfortran.dg/do_concurrent_all_clauses.f90: Remove invalid + dg-error test. + * gfortran.dg/pr119836_1.f90: New test. + * gfortran.dg/pr119836_2.f90: New test. + * gfortran.dg/pr119836_3.f90: New test. + * gfortran.dg/pr119836_4.f90: New test. + 2025-04-18 Thomas Schwinge <tschwinge@baylibre.com> PR cobol/119818 diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c new file mode 100644 index 0000000..871d489 --- /dev/null +++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c @@ -0,0 +1,71 @@ +/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-additional-options "-O2 -march=x86-64-v3" } */ + +typedef int v4si __attribute__((vector_size(16))); +typedef int v8si __attribute__((vector_size(32))); +typedef int v2di __attribute__((vector_size(16))); + +v4si __RTL (startwith ("vregs1")) foo1 (void) +{ +(function "foo1" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)]))) + (cinsn 4 (set (reg:V4SI <1>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)]))) + (cinsn 5 (set (reg:V4SI <2>) + (eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>)))) + (cinsn 6 (set (reg:V4SI <3>) (reg:V4SI <2>))) + (cinsn 7 (set (reg:V4SI xmm0) (reg:V4SI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V4SI xmm0))) +) +} + +v8si __RTL (startwith ("vregs1")) foo2 (void) +{ +(function "foo2" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V8SI <0>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)]))) + (cinsn 4 (set (reg:V8SI <1>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)]))) + (cinsn 5 (set (reg:V8SI <2>) + (eq:V8SI (reg:V8SI <0>) (reg:V8SI <1>)))) + (cinsn 6 (set (reg:V8SI <3>) (reg:V8SI <2>))) + (cinsn 7 (set (reg:V8SI xmm0) (reg:V8SI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V8SI xmm0))) +) +} + +v2di __RTL (startwith ("vregs1")) foo3 (void) +{ +(function "foo3" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V2DI <0>) (const_vector:V2DI [(const_int -1) (const_int -1)]))) + (cinsn 4 (set (reg:V2DI <1>) (const_vector:V2DI [(const_int -1) (const_int -1)]))) + (cinsn 5 (set (reg:V2DI <2>) + (eq:V2DI (reg:V2DI <0>) (reg:V2DI <1>)))) + (cinsn 6 (set (reg:V2DI <3>) (reg:V2DI <2>))) + (cinsn 7 (set (reg:V2DI xmm0) (reg:V2DI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V2DI xmm0))) +) +} + +/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */ diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c new file mode 100644 index 0000000..276c4c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c @@ -0,0 +1,74 @@ +/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-additional-options "-O2 -march=x86-64-v3" } */ + +typedef int v4si __attribute__((vector_size(16))); +typedef int v8si __attribute__((vector_size(32))); +typedef int v2di __attribute__((vector_size(16))); + +v4si __RTL (startwith ("vregs1")) foo1 (void) +{ +(function "foo1" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V4SI <1>) + (mem:V4SI (reg:SI di) [0 ptr S128 A128]))) + (cinsn 4 (set (reg:V4SI <2>) + (eq:V4SI (reg:V4SI <1>) + (mem:V4SI (reg:SI di) [0 ptr S128 A128])))) + (cinsn 5 (set (reg:V4SI <3>) (reg:V4SI <2>))) + (cinsn 6 (set (reg:V4SI xmm0) (reg:V4SI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V4SI xmm0))) +) +} + +v8si __RTL (startwith ("vregs1")) foo2 (void) +{ +(function "foo2" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V8SI <1>) + (mem:V8SI (reg:SI di) [0 ptr S256 A256]))) + (cinsn 4 (set (reg:V8SI <2>) + (eq:V8SI (mem:V8SI (reg:SI di) [0 ptr S256 A256]) + (reg:V8SI <1>)))) + (cinsn 5 (set (reg:V8SI <3>) (reg:V8SI <2>))) + (cinsn 6 (set (reg:V8SI xmm0) (reg:V8SI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V8SI xmm0))) +) +} + +v2di __RTL (startwith ("vregs1")) foo3 (void) +{ +(function "foo3" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (reg:V2DI <1>) + (mem:V2DI (reg:SI di) [0 ptr S128 A128]))) + (cinsn 4 (set (reg:V2DI <2>) + (eq:V2DI (reg:V2DI <1>) + (mem:V2DI (reg:SI di) [0 ptr S128 A128])))) + (cinsn 5 (set (reg:V2DI <3>) (reg:V2DI <2>))) + (cinsn 6 (set (reg:V2DI xmm0) (reg:V2DI <3>))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:V2DI xmm0))) +) +} + +/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */ diff --git a/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c new file mode 100644 index 0000000..ad9e07b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mfma -mavx2" } */ +/* { dg-final { scan-assembler-times {(?n)vfn?m(add|sub)[1-3]*ps} 2 } } */ + +typedef float v4sf __attribute__((vector_size(16))); +/* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a */ + +v4sf +foo (v4sf a, v4sf b) +{ + return a / b; +} diff --git a/gcc/testsuite/gcc.target/riscv/pr118410-1.c b/gcc/testsuite/gcc.target/riscv/pr118410-1.c new file mode 100644 index 0000000..4a8b847 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr118410-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ +/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */ +/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */ + +long orlow(long x) { return x | ((1L << 24) - 1); } + +/* { dg-final { scan-assembler-times "orn\t" 1 } } */ +/* { dg-final { scan-assembler-not "addi\t" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/pr118410-2.c b/gcc/testsuite/gcc.target/riscv/pr118410-2.c new file mode 100644 index 0000000..b63a1d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr118410-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ +/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */ +/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */ + +long xorlow(long x) { return x ^ ((1L << 24) - 1); } + +/* { dg-final { scan-assembler-times "xnor\t" 1 } } */ +/* { dg-final { scan-assembler-not "addi\t" } } */ diff --git a/gcc/testsuite/rust/compile/nr2/compile.exp b/gcc/testsuite/rust/compile/nr2/compile.exp index 4d91dd0..9e15cdd 100644 --- a/gcc/testsuite/rust/compile/nr2/compile.exp +++ b/gcc/testsuite/rust/compile/nr2/compile.exp @@ -19,6 +19,15 @@ # Load support procs. load_lib rust-dg.exp +# These tests don't run runtest_file_p consistently if it +# doesn't return the same values, so disable parallelization +# of this *.exp file. The first parallel runtest to reach +# this will run all the tests serially. +if ![gcc_parallel_test_run_p compile] { + return +} +gcc_parallel_test_enable 0 + # Initialize `dg'. dg-init @@ -136,3 +145,5 @@ namespace eval rust-nr2-ns { # All done. dg-finish + +gcc_parallel_test_enable 1 |